Skip to content

Commit ea2cde2

Browse files
committed
修改数据集添加和显示列
1 parent f744e78 commit ea2cde2

File tree

5 files changed

+95
-51
lines changed

5 files changed

+95
-51
lines changed

myapp/cli.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ def add_project(project_type, name, describe, expand={}):
9797
add_project('job-template', __('数据预处理'), __('结构化话数据特征处理'), {"index": 3})
9898
add_project('job-template', __('数据处理工具'), __('数据的单机或分布式处理任务,ray/spark/hadoop/volcanojob'), {"index": 4})
9999
add_project('job-template', __('特征处理'), __('特征处理相关功能'), {"index": 5})
100+
add_project('job-template', __('图像处理'), __('图像处理相关功能'), {"index": 5.1})
101+
add_project('job-template', __('视频处理'), __('视频处理相关功能'), {"index": 5.2})
102+
add_project('job-template', __('音频处理'), __('音频处理相关功能'), {"index": 5.3})
103+
add_project('job-template', __('文本处理'), __('文本处理相关功能'), {"index": 5.4})
100104
add_project('job-template', __('机器学习框架'), __('传统机器学习框架,sklearn'), {"index": 6})
101105
add_project('job-template', __('机器学习算法'), __('传统机器学习,lr/决策树/gbdt/xgb/fm等'), {"index": 7})
102106
add_project('job-template', __('深度学习'), __('深度框架训练,tf/pytorch/mxnet/mpi/horovod/kaldi等'), {"index": 8})
@@ -452,7 +456,7 @@ def create_dataset(**kwargs):
452456
dataset = Dataset()
453457
dataset.name = kwargs['name']
454458
dataset.field = kwargs.get('field', '')
455-
dataset.version = 'latest'
459+
dataset.version = kwargs.get('version', 'latest')
456460
dataset.label = kwargs.get('label', '')
457461
dataset.status = kwargs.get('status', '')
458462
dataset.describe = kwargs.get('describe', '')
@@ -472,7 +476,8 @@ def create_dataset(**kwargs):
472476
dataset.storage_class = kwargs.get('storage_class', '')
473477
dataset.storage_size = kwargs.get('storage_size', '')
474478
dataset.download_url = kwargs.get('download_url', '')
475-
dataset.owner = 'admin'
479+
dataset.owner = kwargs.get('owner', 'admin')
480+
dataset.features = kwargs.get('features', '{}')
476481
dataset.created_by_fk = 1
477482
dataset.changed_by_fk = 1
478483
db.session.add(dataset)
@@ -631,7 +636,7 @@ def create_inference(project_name, service_name, service_describe, image_name, c
631636
from myapp.views.view_inferenceserving import InferenceService_ModelView_base
632637
inference_class = InferenceService_ModelView_base()
633638
inference_class.src_item_json = {}
634-
inference_class.pre_add(service)
639+
inference_class.use_expand(service)
635640

636641
db.session.add(service)
637642
db.session.commit()
@@ -756,10 +761,13 @@ def add_chat(chat_path):
756761
if not chat.id:
757762
db.session.add(chat)
758763
db.session.commit()
764+
print(f'add chat {name} success')
759765
except Exception as e:
760766
print(e)
761767
# traceback.print_exc()
762768

769+
# 添加chat
770+
# if conf.get('BABEL_DEFAULT_LOCALE','zh')=='zh':
763771
try:
764772
print('begin add chat')
765773
init_file = os.path.join(init_dir, 'init-chat.json')
@@ -768,7 +776,7 @@ def add_chat(chat_path):
768776
except Exception as e:
769777
print(e)
770778
# traceback.print_exc()
771-
# 添加chat
779+
772780
# if conf.get('BABEL_DEFAULT_LOCALE','zh')=='zh':
773781
try:
774782
SQLALCHEMY_DATABASE_URI = os.getenv('MYSQL_SERVICE', '')
@@ -819,6 +827,7 @@ def add_chat(chat_path):
819827
# traceback.print_exc()
820828
# 添加ETL pipeline
821829
try:
830+
print('begin add etl pipeline')
822831
from myapp.models.model_etl_pipeline import ETL_Pipeline
823832
tables = db.session.query(ETL_Pipeline).all()
824833
if len(tables) == 0:
@@ -840,6 +849,7 @@ def add_chat(chat_path):
840849

841850
# 添加nni超参搜索
842851
try:
852+
print('begin add nni')
843853
from myapp.models.model_nni import NNI
844854
nni = db.session.query(NNI).all()
845855
if len(nni) == 0:
@@ -862,14 +872,15 @@ def add_chat(chat_path):
862872
resource_gpu=nni.get('resource_gpu', '0'),
863873
))
864874
db.session.commit()
865-
print('添加etl pipeline成功')
875+
print('添加nni 超参搜索成功')
866876
except Exception as e:
867877
print(e)
868878
# traceback.print_exc()
869879

870880

871881
# 添加镜像在线构建
872882
try:
883+
print('begin add docker')
873884
from myapp.models.model_docker import Docker
874885
docker = db.session.query(Docker).all()
875886
if len(docker) == 0:

myapp/models/model_dataset.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,37 @@ def url_html(self):
7070
html+='<a target=_blank href="%s">%s</a><br>'%(url.strip(),url.strip())
7171
return Markup('<div>%s</div>'%html)
7272

73+
def label_html(self):
74+
urls = self.url.split('\n') if self.url else []
75+
urls = [url.strip() for url in urls if url.strip()]
76+
if urls:
77+
url = urls[0]
78+
return Markup('<a target=_blank href="%s">%s</a>'%(url.strip(), self.label))
79+
return self.label
80+
7381
@property
7482
def path_html(self):
7583
paths= self.path.split('\n')
7684

7785
html = ''
7886
for path in paths:
87+
exist_file=False
7988
if path.strip():
89+
host_path = path.replace('/mnt/','/data/k8s/kubeflow/pipeline/workspace/').strip()
90+
if os.path.exists(host_path):
91+
if os.path.isdir(host_path):
92+
data_csv_path = os.path.join(host_path,'data.csv')
93+
if os.path.exists(data_csv_path):
94+
path = os.path.join(path,'data.csv')
95+
exist_file = True
96+
else:
97+
exist_file=True
98+
if exist_file:
8099
download_url = request.host_url+'/static/'+path.replace('/data/k8s/kubeflow/','')
81100
html += f'<a target=_blank href="{download_url}">{path.strip()}</a><br>'
101+
else:
102+
html += f'{path.strip()}<br>'
103+
82104
return Markup('<div>%s</div>'%html)
83105

84106

myapp/models/model_notebook.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def name_url(self):
6868
# url= url + "#"+self.mount
6969

7070
# 对于有边缘节点,直接使用边缘集群的代理ip
71-
if SERVICE_EXTERNAL_IP:
71+
if SERVICE_EXTERNAL_IP and conf.get('ENABLE_EDGE_K8S',False):
7272
SERVICE_EXTERNAL_IP = SERVICE_EXTERNAL_IP.split('|')[-1].strip()
7373
from myapp.utils import core
7474
meet_ports = core.get_not_black_port(10000 + 10 * self.id)

myapp/models/model_train_model.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
2-
2+
import os.path
3+
import re
34
from flask_appbuilder import Model
45
from sqlalchemy.orm import relationship
56
from sqlalchemy import Text
@@ -14,7 +15,7 @@
1415
from flask import Markup
1516
metadata = Model.metadata
1617
conf = app.config
17-
18+
import pysnooper
1819

1920
class Training_Model(Model,AuditMixinNullable,MyappModelBase):
2021
__tablename__ = 'model'
@@ -59,11 +60,17 @@ def project_url(self):
5960

6061
@property
6162
def deploy(self):
62-
download_url = ''
63-
if self.path or self.download_url:
63+
download_url = f'{__("下载")} |'
64+
if self.download_url and self.download_url.strip():
6465
download_url = f'<a href="/training_model_modelview/api/download/{self.id}">{__("下载")}</a> |'
65-
else:
66-
download_url = f'{__("下载")} |'
66+
if self.path and self.path.strip():
67+
if re.match('^/mnt/', self.path):
68+
local_path = f'/data/k8s/kubeflow/pipeline/workspace/{self.path.strip().replace("/mnt/","")}'
69+
if os.path.exists(local_path):
70+
download_url = f'<a href="/training_model_modelview/api/download/{self.id}">{__("下载")}</a> |'
71+
if 'http://' in self.path or 'https://' in self.path:
72+
download_url = f'<a href="/training_model_modelview/api/download/{self.id}">{__("下载")}</a> |'
73+
6774
ops=download_url+f'''
6875
<a href="/training_model_modelview/api/deploy/{self.id}">{__("发布")}</a>
6976
'''

myapp/views/view_dataset.py

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22
import re
33
import shutil
4-
4+
import zipfile, pandas
55
from flask_appbuilder import action
66
from myapp.views.baseSQLA import MyappSQLAInterface as SQLAInterface
77
from wtforms.validators import DataRequired, Regexp
@@ -29,7 +29,7 @@
2929
from flask_appbuilder import expose
3030
from myapp.views.view_team import Project_Join_Filter, filter_join_org_project
3131
from myapp.models.model_dataset import Dataset
32-
32+
from myapp.utils import core
3333
conf = app.config
3434

3535

@@ -57,9 +57,7 @@ class Dataset_ModelView_base():
5757
order_columns = ['id']
5858
base_filters = [["id", Dataset_Filter, lambda: []]] # 设置权限过滤器
5959

60-
add_columns = ['name', 'version', 'label', 'describe', 'source_type', 'source', 'field',
61-
'usage', 'storage_class', 'file_type', 'url', 'download_url', 'path',
62-
'storage_size', 'entries_num', 'duration', 'price', 'status', 'icon', 'owner', 'features']
60+
add_columns = ['name', 'version', 'label', 'describe', 'url', 'download_url', 'path', 'icon', 'owner', 'features']
6361
show_columns = ['id', 'name', 'version', 'label', 'describe', 'segment', 'source_type', 'source',
6462
'industry', 'field', 'usage', 'storage_class', 'file_type', 'status', 'url',
6563
'path', 'download_url', 'storage_size', 'entries_num', 'duration', 'price', 'status', 'icon',
@@ -75,25 +73,26 @@ class Dataset_ModelView_base():
7573
"years": _("数据年份"),
7674
"url": _("相关网址"),
7775
"url_html": _("相关网址"),
76+
"label_html": _("中文名"),
7877
"path": _("本地路径"),
7978
"path_html": _("本地路径"),
8079
"entries_num": _("条目数量"),
8180
"duration": _("文件时长"),
8281
"price": _("价格"),
83-
"icon": _("示例图"),
84-
"icon_html": _("示例图"),
82+
"icon": _("预览图"),
83+
"icon_html": _("预览图"),
8584
"ops_html": _("操作"),
8685
"features": _("特征列"),
8786
"segment": _("分区")
8887
}
8988

9089
edit_columns = add_columns
91-
list_columns = ['icon_html', 'name', 'version', 'label', 'describe','owner', 'source_type', 'source', 'status',
92-
'field', 'url_html', 'download_url_html', 'usage', 'storage_class', 'file_type', 'path_html', 'storage_size', 'entries_num', 'price']
90+
list_columns = ['icon_html', 'name', 'version', 'label_html', 'describe','owner', 'ops_html', 'path_html', 'download_url_html']
9391

9492
cols_width = {
95-
"name": {"type": "ellip1", "width": 200},
93+
"name": {"type": "ellip1", "width": 150},
9694
"label": {"type": "ellip2", "width": 200},
95+
"label_html": {"type": "ellip2", "width": 200},
9796
"version": {"type": "ellip2", "width": 100},
9897
"describe": {"type": "ellip2", "width": 300},
9998
"field": {"type": "ellip1", "width": 100},
@@ -118,33 +117,30 @@ class Dataset_ModelView_base():
118117
"ops_html": {"type": "ellip1", "width": 200},
119118
}
120119
features_demo = '''
120+
填写规则
121121
{
122122
"column1": {
123123
# feature type
124-
"type": "dict,list,tuple,Value,Sequence,Array2D,Array3D,Array4D,Array5D,Translation,TranslationVariableLanguages,Audio,Image,Video,ClassLabel",
124+
"_type": "dict,list,tuple,Value,Sequence,Array2D,Array3D,Array4D,Array5D,Translation,TranslationVariableLanguages,Audio,Image,Video",
125125
126126
# data type in dict,list,tuple,Value,Sequence,Array2D,Array3D,Array4D,Array5D
127127
"dtype": "null,bool,int8,int16,int32,int64,uint8,uint16,uint32,uint64,float16,float32,float64,time32[(s|ms)],time64[(us|ns)],timestamp[(s|ms|us|ns)],timestamp[(s|ms|us|ns),tz=(tzstring)],date32,date64,duration[(s|ms|us|ns)],decimal128(precision,scale),decimal256(precision,scale),binary,large_binary,string,large_string"
128128
129-
# length of Sequence
130-
"length": 10
131-
132-
# dimension of Array2D,Array3D,Array4D,Array5D
133-
"shape": (1, 2, 3, 4, 5),
134-
135-
# sampling rate of Audio
136-
"sampling_rate":16000,
137-
"mono": true,
138-
"decode": true
139-
140-
# decode of Image
141-
"decode": true
142-
143-
# class of ClassLabel
144-
"num_classes":3,
145-
"names":['class1','class2','class3']
146-
147-
},
129+
}
130+
}
131+
示例:
132+
{
133+
"id": {
134+
"_type": "Value",
135+
"dtype": "string"
136+
},
137+
"image": {
138+
"_type": "Image"
139+
},
140+
"box": {
141+
"_type": "Value",
142+
"dtype": "string"
143+
}
148144
}
149145
'''
150146
add_form_extra_fields = {
@@ -160,7 +156,7 @@ class Dataset_ModelView_base():
160156
description= _('数据集版本'),
161157
default='latest',
162158
widget=BS3TextFieldWidget(),
163-
validators=[DataRequired(), Regexp("^[a-z][a-z0-9_\-]*[a-z0-9]$"), ]
159+
validators=[DataRequired(), Regexp("[a-z0-9_\-]*"), ]
164160
),
165161
"subdataset": StringField(
166162
label= _('子数据集'),
@@ -255,19 +251,26 @@ class Dataset_ModelView_base():
255251
),
256252
"path": StringField(
257253
label= _('本地路径'),
258-
description='',
254+
description='本地文件通过notebook上传到平台内,处理后,压缩成单个压缩文件,每行一个压缩文件地址',
259255
widget=MyBS3TextAreaFieldWidget(rows=3),
260256
default=''
261257
),
262258
"download_url": StringField(
263259
label= _('下载地址'),
264-
description='',
260+
description='可以直接下载的链接地址,每行一个url',
265261
widget=MyBS3TextAreaFieldWidget(rows=3),
266262
default=''
267263
),
264+
"icon": StringField(
265+
label=_('预览图'),
266+
default='',
267+
description=_('可以为图片地址,svg源码,或者帮助文档链接'),
268+
widget=BS3TextFieldWidget(),
269+
validators=[]
270+
),
268271
"features": StringField(
269272
label= _('特征列'),
270-
description= _('数据集中的列信息'),
273+
description= _('数据集中的列信息,要求数据集中要有data.csv文件用于表示数据集中的全部数据'),
271274
widget=MyBS3TextAreaFieldWidget(rows=3, tips=Markup('<pre><code>' + features_demo + "</code></pre>")),
272275
default=''
273276
)
@@ -280,13 +283,14 @@ class Dataset_ModelView_base():
280283
def pre_add(self, item):
281284
if not item.owner:
282285
item.owner = g.user.username + ",*"
283-
if not item.icon:
284-
item.icon = '/static/assets/images/dataset.png'
286+
if item.icon and '</svg>' in item.icon:
287+
item.icon = re.sub(r'width="\d+(\.\d+)?(px)?"', f'width="50px"', item.icon)
288+
item.icon = re.sub(r'height="\d+(\.\d+)?(px)?"', f'height="50px"', item.icon)
285289
if not item.version:
286290
item.version = 'latest'
287291
if not item.subdataset:
288292
item.subdataset = item.name
289-
293+
item.features = json.dumps(json.loads(item.features),indent=4,ensure_ascii=False) if item.features else "{}"
290294
def pre_update(self, item):
291295
self.pre_add(item)
292296

@@ -405,15 +409,15 @@ def path2url(path):
405409
dataset = db.session.query(Dataset).filter_by(id=int(dataset_id)).first()
406410
try:
407411
download_url = []
408-
if dataset.path:
412+
if dataset.path and dataset.path.strip():
409413
# 如果存储在集群数据集中心
410414
# 如果存储在个人目录
411415
paths = dataset.path.split('\n')
412416
for path in paths:
413417
download_url.append(path2url(path))
414418

415419
# 如果存储在外部链接
416-
elif dataset.download_url:
420+
elif dataset.download_url and dataset.download_url.strip():
417421
download_url = dataset.download_url.split('\n')
418422
else:
419423
# 如果存储在对象存储中

0 commit comments

Comments
 (0)