1
1
import datetime
2
2
import re
3
3
import shutil
4
-
4
+ import zipfile , pandas
5
5
from flask_appbuilder import action
6
6
from myapp .views .baseSQLA import MyappSQLAInterface as SQLAInterface
7
7
from wtforms .validators import DataRequired , Regexp
29
29
from flask_appbuilder import expose
30
30
from myapp .views .view_team import Project_Join_Filter , filter_join_org_project
31
31
from myapp .models .model_dataset import Dataset
32
-
32
+ from myapp . utils import core
33
33
conf = app .config
34
34
35
35
@@ -57,9 +57,7 @@ class Dataset_ModelView_base():
57
57
order_columns = ['id' ]
58
58
base_filters = [["id" , Dataset_Filter , lambda : []]] # 设置权限过滤器
59
59
60
- add_columns = ['name' , 'version' , 'label' , 'describe' , 'source_type' , 'source' , 'field' ,
61
- 'usage' , 'storage_class' , 'file_type' , 'url' , 'download_url' , 'path' ,
62
- 'storage_size' , 'entries_num' , 'duration' , 'price' , 'status' , 'icon' , 'owner' , 'features' ]
60
+ add_columns = ['name' , 'version' , 'label' , 'describe' , 'url' , 'download_url' , 'path' , 'icon' , 'owner' , 'features' ]
63
61
show_columns = ['id' , 'name' , 'version' , 'label' , 'describe' , 'segment' , 'source_type' , 'source' ,
64
62
'industry' , 'field' , 'usage' , 'storage_class' , 'file_type' , 'status' , 'url' ,
65
63
'path' , 'download_url' , 'storage_size' , 'entries_num' , 'duration' , 'price' , 'status' , 'icon' ,
@@ -75,25 +73,26 @@ class Dataset_ModelView_base():
75
73
"years" : _ ("数据年份" ),
76
74
"url" : _ ("相关网址" ),
77
75
"url_html" : _ ("相关网址" ),
76
+ "label_html" : _ ("中文名" ),
78
77
"path" : _ ("本地路径" ),
79
78
"path_html" : _ ("本地路径" ),
80
79
"entries_num" : _ ("条目数量" ),
81
80
"duration" : _ ("文件时长" ),
82
81
"price" : _ ("价格" ),
83
- "icon" : _ ("示例图 " ),
84
- "icon_html" : _ ("示例图 " ),
82
+ "icon" : _ ("预览图 " ),
83
+ "icon_html" : _ ("预览图 " ),
85
84
"ops_html" : _ ("操作" ),
86
85
"features" : _ ("特征列" ),
87
86
"segment" : _ ("分区" )
88
87
}
89
88
90
89
edit_columns = add_columns
91
- list_columns = ['icon_html' , 'name' , 'version' , 'label' , 'describe' ,'owner' , 'source_type' , 'source' , 'status' ,
92
- 'field' , 'url_html' , 'download_url_html' , 'usage' , 'storage_class' , 'file_type' , 'path_html' , 'storage_size' , 'entries_num' , 'price' ]
90
+ list_columns = ['icon_html' , 'name' , 'version' , 'label_html' , 'describe' ,'owner' , 'ops_html' , 'path_html' , 'download_url_html' ]
93
91
94
92
cols_width = {
95
- "name" : {"type" : "ellip1" , "width" : 200 },
93
+ "name" : {"type" : "ellip1" , "width" : 150 },
96
94
"label" : {"type" : "ellip2" , "width" : 200 },
95
+ "label_html" : {"type" : "ellip2" , "width" : 200 },
97
96
"version" : {"type" : "ellip2" , "width" : 100 },
98
97
"describe" : {"type" : "ellip2" , "width" : 300 },
99
98
"field" : {"type" : "ellip1" , "width" : 100 },
@@ -118,33 +117,30 @@ class Dataset_ModelView_base():
118
117
"ops_html" : {"type" : "ellip1" , "width" : 200 },
119
118
}
120
119
features_demo = '''
120
+ 填写规则
121
121
{
122
122
"column1": {
123
123
# feature type
124
- "type ": "dict,list,tuple,Value,Sequence,Array2D,Array3D,Array4D,Array5D,Translation,TranslationVariableLanguages,Audio,Image,Video,ClassLabel ",
124
+ "_type ": "dict,list,tuple,Value,Sequence,Array2D,Array3D,Array4D,Array5D,Translation,TranslationVariableLanguages,Audio,Image,Video",
125
125
126
126
# data type in dict,list,tuple,Value,Sequence,Array2D,Array3D,Array4D,Array5D
127
127
"dtype": "null,bool,int8,int16,int32,int64,uint8,uint16,uint32,uint64,float16,float32,float64,time32[(s|ms)],time64[(us|ns)],timestamp[(s|ms|us|ns)],timestamp[(s|ms|us|ns),tz=(tzstring)],date32,date64,duration[(s|ms|us|ns)],decimal128(precision,scale),decimal256(precision,scale),binary,large_binary,string,large_string"
128
128
129
- # length of Sequence
130
- "length": 10
131
-
132
- # dimension of Array2D,Array3D,Array4D,Array5D
133
- "shape": (1, 2, 3, 4, 5),
134
-
135
- # sampling rate of Audio
136
- "sampling_rate":16000,
137
- "mono": true,
138
- "decode": true
139
-
140
- # decode of Image
141
- "decode": true
142
-
143
- # class of ClassLabel
144
- "num_classes":3,
145
- "names":['class1','class2','class3']
146
-
147
- },
129
+ }
130
+ }
131
+ 示例:
132
+ {
133
+ "id": {
134
+ "_type": "Value",
135
+ "dtype": "string"
136
+ },
137
+ "image": {
138
+ "_type": "Image"
139
+ },
140
+ "box": {
141
+ "_type": "Value",
142
+ "dtype": "string"
143
+ }
148
144
}
149
145
'''
150
146
add_form_extra_fields = {
@@ -160,7 +156,7 @@ class Dataset_ModelView_base():
160
156
description = _ ('数据集版本' ),
161
157
default = 'latest' ,
162
158
widget = BS3TextFieldWidget (),
163
- validators = [DataRequired (), Regexp ("^ [a-z][a- z0-9_\-]*[a-z0-9]$ " ), ]
159
+ validators = [DataRequired (), Regexp ("[a-z0-9_\-]*" ), ]
164
160
),
165
161
"subdataset" : StringField (
166
162
label = _ ('子数据集' ),
@@ -255,19 +251,26 @@ class Dataset_ModelView_base():
255
251
),
256
252
"path" : StringField (
257
253
label = _ ('本地路径' ),
258
- description = '' ,
254
+ description = '本地文件通过notebook上传到平台内,处理后,压缩成单个压缩文件,每行一个压缩文件地址 ' ,
259
255
widget = MyBS3TextAreaFieldWidget (rows = 3 ),
260
256
default = ''
261
257
),
262
258
"download_url" : StringField (
263
259
label = _ ('下载地址' ),
264
- description = '' ,
260
+ description = '可以直接下载的链接地址,每行一个url ' ,
265
261
widget = MyBS3TextAreaFieldWidget (rows = 3 ),
266
262
default = ''
267
263
),
264
+ "icon" : StringField (
265
+ label = _ ('预览图' ),
266
+ default = '' ,
267
+ description = _ ('可以为图片地址,svg源码,或者帮助文档链接' ),
268
+ widget = BS3TextFieldWidget (),
269
+ validators = []
270
+ ),
268
271
"features" : StringField (
269
272
label = _ ('特征列' ),
270
- description = _ ('数据集中的列信息' ),
273
+ description = _ ('数据集中的列信息,要求数据集中要有data.csv文件用于表示数据集中的全部数据 ' ),
271
274
widget = MyBS3TextAreaFieldWidget (rows = 3 , tips = Markup ('<pre><code>' + features_demo + "</code></pre>" )),
272
275
default = ''
273
276
)
@@ -280,13 +283,14 @@ class Dataset_ModelView_base():
280
283
def pre_add (self , item ):
281
284
if not item .owner :
282
285
item .owner = g .user .username + ",*"
283
- if not item .icon :
284
- item .icon = '/static/assets/images/dataset.png'
286
+ if item .icon and '</svg>' in item .icon :
287
+ item .icon = re .sub (r'width="\d+(\.\d+)?(px)?"' , f'width="50px"' , item .icon )
288
+ item .icon = re .sub (r'height="\d+(\.\d+)?(px)?"' , f'height="50px"' , item .icon )
285
289
if not item .version :
286
290
item .version = 'latest'
287
291
if not item .subdataset :
288
292
item .subdataset = item .name
289
-
293
+ item . features = json . dumps ( json . loads ( item . features ), indent = 4 , ensure_ascii = False ) if item . features else "{}"
290
294
def pre_update (self , item ):
291
295
self .pre_add (item )
292
296
@@ -405,15 +409,15 @@ def path2url(path):
405
409
dataset = db .session .query (Dataset ).filter_by (id = int (dataset_id )).first ()
406
410
try :
407
411
download_url = []
408
- if dataset .path :
412
+ if dataset .path and dataset . path . strip () :
409
413
# 如果存储在集群数据集中心
410
414
# 如果存储在个人目录
411
415
paths = dataset .path .split ('\n ' )
412
416
for path in paths :
413
417
download_url .append (path2url (path ))
414
418
415
419
# 如果存储在外部链接
416
- elif dataset .download_url :
420
+ elif dataset .download_url and dataset . download_url . strip () :
417
421
download_url = dataset .download_url .split ('\n ' )
418
422
else :
419
423
# 如果存储在对象存储中
0 commit comments