1
1
"""Example model architectures for pypsps."""
2
2
3
- from typing import List
3
+ from typing import List , Tuple
4
4
5
5
import tensorflow as tf
6
6
@@ -31,7 +31,9 @@ def recommended_callbacks(monitor="val_loss") -> List[tf.keras.callbacks.Callbac
31
31
32
32
33
33
def _build_binary_continuous_causal_loss (
34
- n_states : int , alpha : float = 1.0
34
+ n_states : int ,
35
+ alpha : float ,
36
+ df_penalty_l1 : float ,
35
37
) -> losses .CausalLoss :
36
38
"""Builds an example of binary treatment & continuous outcome causal loss."""
37
39
psps_outcome_loss = losses .OutcomeLoss (
@@ -48,7 +50,7 @@ def _build_binary_continuous_causal_loss(
48
50
alpha = alpha ,
49
51
outcome_loss_weight = 1.0 ,
50
52
predictive_states_regularizer = pypress .keras .regularizers .DegreesOfFreedom (
51
- 10.0 , df = n_states - 1
53
+ l1 = df_penalty_l1 , df = n_states - 1
52
54
),
53
55
reduction = "sum_over_batch_size" ,
54
56
)
@@ -60,6 +62,8 @@ def build_toy_model(
60
62
n_features : int ,
61
63
compile : bool = True ,
62
64
alpha : float = 1.0 ,
65
+ df_penalty_l1 : float = 1.0 ,
66
+ learning_rate : float = 0.01 ,
63
67
) -> tf .keras .Model :
64
68
"""Builds a pypsps toy model for binary treatment & continous outcome.
65
69
@@ -72,6 +76,8 @@ def build_toy_model(
72
76
n_features: number of (numeric) features to use as input.
73
77
compile: if True, compiles pypsps model with the appropriate pypsps causal loss functions.
74
78
alpha: propensity score penalty (by default alpha = 1., which corresponds to equal weight)
79
+ df_penalty_l1: l1 parameter for the DF regularization
80
+ learning_rate: learning rate of the optimizer.
75
81
76
82
Returns:
77
83
A tf.keras Model with the pypsps architecture (compiled model if `compile=True`).
@@ -141,11 +147,154 @@ def build_toy_model(
141
147
if compile :
142
148
143
149
psps_causal_loss = _build_binary_continuous_causal_loss (
144
- n_states = n_states , alpha = alpha
150
+ n_states = n_states ,
151
+ alpha = alpha ,
152
+ df_penalty_l1 = df_penalty_l1 ,
153
+ )
154
+ model .compile (
155
+ loss = psps_causal_loss ,
156
+ optimizer = tfk .optimizers .Nadam (learning_rate = learning_rate ),
157
+ metrics = [
158
+ metrics .PropensityScoreBinaryCrossentropy (),
159
+ metrics .PropensityScoreAUC (curve = "PR" ),
160
+ metrics .OutcomeMeanSquaredError (),
161
+ ],
162
+ )
163
+
164
+ return model
165
+
166
+
167
+ def build_model_binary_normal (
168
+ n_states : int ,
169
+ n_features : int ,
170
+ predictive_state_hidden_layers : List [Tuple [int , str ]],
171
+ outcome_hidden_layers : List [Tuple [int , str ]],
172
+ loc_layer : Tuple [int , str ] = None ,
173
+ scale_layer : Tuple [int , str ] = None ,
174
+ compile : bool = True ,
175
+ alpha : float = 1.0 ,
176
+ df_penalty_l1 : float = 1.0 ,
177
+ learning_rate : float = 0.01 ,
178
+ dropout_rate : float = 0.2 ,
179
+ ) -> tf .keras .Model :
180
+ """Builds a pypsps toy model for binary treatment & continous outcome.
181
+
182
+ All pypsps keras layers can be used to build more complex causal model architectures
183
+ within a TensorFlow graph. The specific model structure here is only used
184
+ for proof-of-concept / demo purposes.
185
+
186
+ Args:
187
+ n_states: number of predictive states to use in the pypsps model.
188
+ n_features: number of (numeric) features to use as input.
189
+ compile: if True, compiles pypsps model with the appropriate pypsps causal loss functions.
190
+ alpha: propensity score penalty (by default alpha = 1., which corresponds to equal weight)
191
+ df_penalty_l1: l1 parameter for the DF regularization
192
+ learning_rate: learning rate of the optimizer.
193
+
194
+ Returns:
195
+ A tf.keras Model with the pypsps architecture (compiled model if `compile=True`).
196
+ """
197
+
198
+ assert n_states >= 1 , f"Got n_states={ n_states } "
199
+ assert n_features >= 1 , f"Got n_features={ n_features } "
200
+
201
+ features = tfk .layers .Input (shape = (n_features ,))
202
+ treat = tfk .layers .Input (shape = (1 ,))
203
+
204
+ features_bn = tfk .layers .BatchNormalization ()(features )
205
+ feat_treat = tfk .layers .Concatenate (name = "features_and_treatment" )(
206
+ [features_bn , treat ]
207
+ )
208
+
209
+ ps_hidden = tf .keras .layers .Dense (
210
+ predictive_state_hidden_layers [0 ][0 ], predictive_state_hidden_layers [0 ][1 ]
211
+ )(features_bn )
212
+ ps_hidden = tf .keras .layers .Dropout (dropout_rate )(ps_hidden )
213
+ ps_hidden = tf .keras .layers .BatchNormalization ()(ps_hidden )
214
+
215
+ for units , act in predictive_state_hidden_layers [1 :]:
216
+ ps_hidden = tf .keras .layers .Dense (units , act )(ps_hidden )
217
+ ps_hidden = tf .keras .layers .Dropout (dropout_rate )(ps_hidden )
218
+ ps_hidden = tf .keras .layers .BatchNormalization ()(ps_hidden )
219
+
220
+ ps_hidden = tf .keras .layers .Concatenate ()([ps_hidden , features_bn ])
221
+ pss = pypress .keras .layers .PredictiveStateSimplex (
222
+ n_states = n_states , input_dim = n_features
223
+ )
224
+ pred_states = pss (ps_hidden )
225
+
226
+ # Propensity score for binary treatment (--> "sigmoid" activation).
227
+ prop_score = pypress .keras .layers .PredictiveStateMeans (
228
+ units = 1 , activation = "sigmoid" , name = "propensity_score"
229
+ )(pred_states )
230
+
231
+ outcome_hidden = tf .keras .layers .Dense (
232
+ outcome_hidden_layers [0 ][0 ], outcome_hidden_layers [0 ][1 ]
233
+ )(feat_treat )
234
+ outcome_hidden = tf .keras .layers .Dropout (dropout_rate )(outcome_hidden )
235
+ outcome_hidden = tf .keras .layers .BatchNormalization ()(outcome_hidden )
236
+
237
+ for units , act in outcome_hidden_layers [1 :]:
238
+ outcome_hidden = tf .keras .layers .Dense (units , act )(outcome_hidden )
239
+ outcome_hidden = tf .keras .layers .Dropout (dropout_rate )(outcome_hidden )
240
+ outcome_hidden = tf .keras .layers .BatchNormalization ()(outcome_hidden )
241
+
242
+ outcome_hidden = tf .keras .layers .Concatenate ()([outcome_hidden , feat_treat ])
243
+
244
+ loc_preds = []
245
+ scale_preds = []
246
+ # One outcome model per state.
247
+ for state_id in range (n_states ):
248
+ loc_preds .append (
249
+ tfk .layers .Dense (1 , name = "loc_pred_state_" + str (state_id ))(
250
+ tfk .layers .Dense (
251
+ loc_layer [0 ],
252
+ loc_layer [1 ],
253
+ name = "loc_feat_eng_state_" + str (state_id ),
254
+ )(outcome_hidden )
255
+ )
256
+ )
257
+
258
+ if scale_layer is None :
259
+ # In this toy model use a constant scale estimate (BiasOnly); if needed
260
+ # change this to a scale parameter that changes as a function of inputs / hidden layers.
261
+ scale_preds .append (
262
+ tf .keras .activations .softplus (
263
+ layers .BiasOnly (name = "scale_logit_" + str (state_id ))(feat_treat )
264
+ )
265
+ )
266
+ else :
267
+ scale_preds .append (
268
+ tfk .layers .Dense (
269
+ 1 , activation = "softplus" , name = "scale_pred_state_" + str (state_id )
270
+ )(
271
+ tfk .layers .Dense (
272
+ scale_layer [0 ],
273
+ scale_layer [1 ],
274
+ name = "scale_feat_eng_state_" + str (state_id ),
275
+ )(outcome_hidden )
276
+ )
277
+ )
278
+
279
+ loc_comb = tfk .layers .Concatenate (name = "loc_pred_combined" )(loc_preds )
280
+ scale_comb = tfk .layers .Concatenate (name = "scale_pred_combined" )(scale_preds )
281
+
282
+ outputs_concat = tfk .layers .Concatenate (name = "output_tensor" )(
283
+ [loc_comb , scale_comb , pred_states , prop_score ]
284
+ )
285
+
286
+ model = tfk .models .Model (inputs = [features , treat ], outputs = outputs_concat )
287
+
288
+ if compile :
289
+
290
+ psps_causal_loss = _build_binary_continuous_causal_loss (
291
+ n_states = n_states ,
292
+ alpha = alpha ,
293
+ df_penalty_l1 = df_penalty_l1 ,
145
294
)
146
295
model .compile (
147
296
loss = psps_causal_loss ,
148
- optimizer = tfk .optimizers .Nadam (learning_rate = 0.01 ),
297
+ optimizer = tfk .optimizers .Nadam (learning_rate = learning_rate ),
149
298
metrics = [
150
299
metrics .PropensityScoreBinaryCrossentropy (),
151
300
metrics .PropensityScoreAUC (curve = "PR" ),
0 commit comments