Skip to content

Commit 8a967a3

Browse files
authored
Pzmm module (#67)
* Initial commit of adding pzmm without any adjustments. * Adjust setup.py to include required pzmm packages. * Include README file. * Add demo video to README. * Add link to video with image link. * Adjust image size on readme * <As before> * Update README.md * Update __init__.py Remove version, name, and dev built-in values for pzmm * Adjust README file for PZMM's new location as a module of sasctl instead of a standalone package. * SAS Acrolinx check and adjustments. * Spell check. * Better formatting of README file.
1 parent 0c4e35d commit 8a967a3

File tree

10 files changed

+1893
-0
lines changed

10 files changed

+1893
-0
lines changed

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def get_file(filename):
4343
package_dir={"": "src"},
4444
python_requires='>=2.7',
4545
install_requires=[
46+
'pandas',
47+
'scikit-learn',
4648
'requests',
4749
'six >= 1.11',
4850
'futures ; python_version <= "2.7"'

src/sasctl/pzmm/README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# PZMM Module
2+
3+
## Overview
4+
5+
The goal of the PZMM (Python Zip Model Management) module is to allow users of SAS Model Manager on SAS Viya (MM) and SAS Open Model Manager (OMM) to zip through the process of importing Python models to OMM & MM. In order to facilitate model imports, the module can allow the user to complete the following tasks:
6+
7+
* Writes JSON files to read in the model information, which includes the following files:
8+
* `fileMetadata.json` specifies the file roles for the names of the input and output variables files, the Python score code file, and the Python pickle file
9+
* `ModelProperties.json` is used to set the model properties that are read during the import process
10+
* `inputVar.json` and `outputVar.json` are used to set the input and output variables of the model
11+
* `dmcas_fitstat.json` is an optional file that provides the fit statistics that are associated with the imported model, which are either user-generated or data-generated
12+
* `dmcas_lift.json` and `dmcas_roc.json` are optional files that provide the Lift and ROC plots that are associated with the imported model, which are data-generated
13+
* Writes the `*score.py` file that is used for model scoring in MM & OMM
14+
* Serializes a trained model into a binary pickle file
15+
* Archives all relevant model files into a ZIP file and imports the model using REST API calls
16+
17+
## Prerequisites
18+
19+
Use of this package requires the following:
20+
21+
* Python version 3+
22+
* Automatic generation of score code is limited to Python >= 3.6 (this functionality will be backported to Python 3+ in future releases)
23+
* SAS Viya 3.5+ environment or SAS Open Model Manager 1.2+ and user credentials
24+
* External Python libraries:
25+
* scipy v1.4.0+
26+
* scikit-learn v0.22.1+
27+
* pandas v0.25.3+
28+
* requests v2.23.0+
29+
30+
## Module Import
31+
32+
Importing the PZMM module is done by running the following line in Python after installation of the python-sasctl package:
33+
34+
`import sasctl.pzmm as pzmm`
35+
36+
## Demos
37+
38+
The following demo video walks through the process of importing a Python model in SAS Model Manager in SAS Viya and shows most of the current features of PZMM. (Note that for this release, the module import statement needs to be `import sasctl.pzmm as pzmm` instead of `import pzmm`.)
39+
40+
[<img src="pzmmintro.jpg" alt="drawing" width="600"/>](https://players.brightcove.net/3665946608001/default_default/index.html?videoId=6164663310001)
41+
42+
## License
43+
44+
This project is licensed under the [Apache 2.0 License](/LICENSE).
45+
46+

src/sasctl/pzmm/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Copyright (c) 2020, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from .pickleModel import PickleModel
5+
from .uploadData import ModelImport
6+
from .writeJSONFiles import JSONFiles
7+
from .zipModel import ZipModel
8+
from .writeScoreCode import ScoreCode
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
{
2+
"creationTimeStamp" : null,
3+
"modifiedTimeStamp" : null,
4+
"createdBy" : null,
5+
"modifiedBy" : null,
6+
"id" : null,
7+
"name" : "dmcas_fitstat",
8+
"description" : null,
9+
"revision" : 0,
10+
"groupName" : null,
11+
"order" : 0,
12+
"type" : null,
13+
"parameterMap" : {
14+
"_RASE_" : {
15+
"parameter" : "_RASE_",
16+
"type" : "num",
17+
"label" : "Root Average Squared Error",
18+
"length" : 8,
19+
"order" : 7,
20+
"values" : [ "_RASE_" ],
21+
"preformatted" : false
22+
},
23+
"_NObs_" : {
24+
"parameter" : "_NObs_",
25+
"type" : "num",
26+
"label" : "Sum of Frequencies",
27+
"length" : 8,
28+
"order" : 4,
29+
"values" : [ "_NObs_" ],
30+
"preformatted" : false
31+
},
32+
"_GINI_" : {
33+
"parameter" : "_GINI_",
34+
"type" : "num",
35+
"label" : "Gini Coefficient",
36+
"length" : 8,
37+
"order" : 12,
38+
"values" : [ "_GINI_" ],
39+
"preformatted" : false
40+
},
41+
"_GAMMA_" : {
42+
"parameter" : "_GAMMA_",
43+
"type" : "num",
44+
"label" : "Gamma",
45+
"length" : 8,
46+
"order" : 13,
47+
"values" : [ "_GAMMA_" ],
48+
"preformatted" : false
49+
},
50+
"_formattedPartition_" : {
51+
"parameter" : "_formattedPartition_",
52+
"type" : "char",
53+
"label" : "Formatted Partition",
54+
"length" : 12,
55+
"order" : 3,
56+
"values" : [ "_formattedPartition_" ],
57+
"preformatted" : false
58+
},
59+
"_DataRole_" : {
60+
"parameter" : "_DataRole_",
61+
"type" : "char",
62+
"label" : "Data Role",
63+
"length" : 10,
64+
"order" : 1,
65+
"values" : [ "_DataRole_" ],
66+
"preformatted" : false
67+
},
68+
"_MCE_" : {
69+
"parameter" : "_MCE_",
70+
"type" : "num",
71+
"label" : "Misclassification Rate",
72+
"length" : 8,
73+
"order" : 8,
74+
"values" : [ "_MCE_" ],
75+
"preformatted" : false
76+
},
77+
"_ASE_" : {
78+
"parameter" : "_ASE_",
79+
"type" : "num",
80+
"label" : "Average Squared Error",
81+
"length" : 8,
82+
"order" : 5,
83+
"values" : [ "_ASE_" ],
84+
"preformatted" : false
85+
},
86+
"_MCLL_" : {
87+
"parameter" : "_MCLL_",
88+
"type" : "num",
89+
"label" : "Multi-Class Log Loss",
90+
"length" : 8,
91+
"order" : 9,
92+
"values" : [ "_MCLL_" ],
93+
"preformatted" : false
94+
},
95+
"_KS_" : {
96+
"parameter" : "_KS_",
97+
"type" : "num",
98+
"label" : "KS (Youden)",
99+
"length" : 8,
100+
"order" : 10,
101+
"values" : [ "_KS_" ],
102+
"preformatted" : false
103+
},
104+
"_KSPostCutoff_" : {
105+
"parameter" : "_KSPostCutoff_",
106+
"type" : "num",
107+
"label" : "ROC Separation",
108+
"length" : 8,
109+
"order" : 16,
110+
"values" : [ "_KSPostCutoff_" ],
111+
"preformatted" : false
112+
},
113+
"_DIV_" : {
114+
"parameter" : "_DIV_",
115+
"type" : "num",
116+
"label" : "Divisor for ASE",
117+
"length" : 8,
118+
"order" : 6,
119+
"values" : [ "_DIV_" ],
120+
"preformatted" : false
121+
},
122+
"_TAU_" : {
123+
"parameter" : "_TAU_",
124+
"type" : "num",
125+
"label" : "Tau",
126+
"length" : 8,
127+
"order" : 14,
128+
"values" : [ "_TAU_" ],
129+
"preformatted" : false
130+
},
131+
"_KSCut_" : {
132+
"parameter" : "_KSCut_",
133+
"type" : "num",
134+
"label" : "KS Cutoff",
135+
"length" : 8,
136+
"order" : 15,
137+
"values" : [ "_KSCut_" ],
138+
"preformatted" : false
139+
},
140+
"_C_" : {
141+
"parameter" : "_C_",
142+
"type" : "num",
143+
"label" : "Area Under ROC",
144+
"length" : 8,
145+
"order" : 11,
146+
"values" : [ "_C_" ],
147+
"preformatted" : false
148+
},
149+
"_PartInd_" : {
150+
"parameter" : "_PartInd_",
151+
"type" : "num",
152+
"label" : "Partition Indicator",
153+
"length" : 8,
154+
"order" : 2,
155+
"values" : [ "_PartInd_" ],
156+
"preformatted" : false
157+
}
158+
},
159+
"data" : [ {
160+
"dataMap" : {
161+
"_RASE_" : null,
162+
"_NObs_" : null,
163+
"_GINI_" : null,
164+
"_GAMMA_" : null,
165+
"_formattedPartition_" : " 0",
166+
"_DataRole_" : "VALIDATE",
167+
"_MCE_" : null,
168+
"_ASE_" : null,
169+
"_MCLL_" : null,
170+
"_KS_" : null,
171+
"_KSPostCutoff_" : null,
172+
"_DIV_" : null,
173+
"_TAU_" : null,
174+
"_KSCut_" : null,
175+
"_C_" : null,
176+
"_PartInd_" : null
177+
},
178+
"rowNumber" : 1,
179+
"header" : null
180+
}, {
181+
"dataMap" : {
182+
"_RASE_" : null,
183+
"_NObs_" : null,
184+
"_GINI_" : null,
185+
"_GAMMA_" : null,
186+
"_formattedPartition_" : " 1",
187+
"_DataRole_" : "TRAIN",
188+
"_MCE_" : null,
189+
"_ASE_" : null,
190+
"_MCLL_" : null,
191+
"_KS_" : null,
192+
"_KSPostCutoff_" : null,
193+
"_DIV_" : null,
194+
"_TAU_" : null,
195+
"_KSCut_" : null,
196+
"_C_" : null,
197+
"_PartInd_" : null
198+
},
199+
"rowNumber" : 2,
200+
"header" : null
201+
}, {
202+
"dataMap" : {
203+
"_RASE_" : null,
204+
"_NObs_" : null,
205+
"_GINI_" : null,
206+
"_GAMMA_" : null,
207+
"_formattedPartition_" : " 2",
208+
"_DataRole_" : "TEST",
209+
"_MCE_" : null,
210+
"_ASE_" : null,
211+
"_MCLL_" : null,
212+
"_KS_" : null,
213+
"_KSPostCutoff_" : null,
214+
"_DIV_" : null,
215+
"_TAU_" : null,
216+
"_KSCut_" : null,
217+
"_C_" : null,
218+
"_PartInd_" : null
219+
},
220+
"rowNumber" : 3,
221+
"header" : null
222+
} ],
223+
"version" : 1,
224+
"xInteger" : false,
225+
"yInteger" : false
226+
}

src/sasctl/pzmm/pickleModel.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright (c) 2020, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
5+
# %%
6+
import os
7+
8+
import pickle
9+
10+
# %%
11+
class PickleModel():
12+
13+
def pickleTrainedModel(self, trainedModel, modelPrefix, pPath=os.getcwd()):
14+
'''
15+
Write trained model to a binary pickle file.
16+
17+
Parameters
18+
---------------
19+
trainedModel
20+
User-defined trained model.
21+
modelPrefix : string
22+
Variable name for the model to be displayed in SAS Open Model Manager
23+
(i.e. hmeqClassTree + [Score.py || .pickle]).
24+
pPath : string, optional
25+
File location for the output pickle file. Default is the current
26+
working directory.
27+
28+
Yields
29+
---------------
30+
'*.pickle'
31+
Binary pickle file containing a trained model.
32+
'''
33+
34+
with open(os.path.join(pPath, modelPrefix + '.pickle'), 'wb') as pFile:
35+
pickle.dump(trainedModel, pFile)

src/sasctl/pzmm/pzmmintro.jpg

242 KB
Loading

0 commit comments

Comments
 (0)