|
22 | 22 | from alf.examples.benchmarks.dm_control import dmc_conf
|
23 | 23 | from alf.optimizers import Adam
|
24 | 24 |
|
25 |
| -actor_hidden_layers = (256, 256) |
26 |
| -joint_hidden_layers = (256, 256) |
27 |
| -# actor_hidden_layers = (32, 32) |
28 |
| -# joint_hidden_layers = (32, 32) |
| 25 | +debug_mode = False |
29 | 26 | optimizer = Adam(lr=5e-4)
|
30 | 27 | use_obs_normalizer = True
|
31 | 28 | obs_normalizer_clipping = False
|
32 | 29 |
|
| 30 | +if debug_mode: |
| 31 | + actor_hidden_layers = (32, 32) |
| 32 | + joint_hidden_layers = (32, 32) |
| 33 | + num_actor_eval_samples = 64 |
| 34 | +else: |
| 35 | + actor_hidden_layers = (256, 256) |
| 36 | + joint_hidden_layers = (256, 256) |
| 37 | + num_actor_eval_samples = 512 |
| 38 | + |
33 | 39 | if use_obs_normalizer:
|
34 | 40 | data_transformer_ctor = ObservationNormalizer
|
35 | 41 | else:
|
|
56 | 62 | actor_network_cls=actor_network_cls,
|
57 | 63 | critic_network_cls=critic_network_cls,
|
58 | 64 | num_actors=10,
|
59 |
| - use_target_actor=False, |
60 | 65 | use_bootstrap_actors=True,
|
61 | 66 | bootstrap_mask_prob=0.8,
|
62 |
| - # num_actor_eval_samples=512, |
63 |
| - num_actor_eval_samples=64, |
| 67 | + num_actor_eval_samples=num_actor_eval_samples, |
64 | 68 | eval_samples_init_method='normal',
|
65 | 69 | eval_samples_clipping=obs_normalizer_clipping,
|
66 |
| - actor_eval_type='last', |
67 |
| - actor_encoding_dim=256, |
| 70 | + actor_eval_type='exclude_input', |
| 71 | + actor_encoding_dim=None, |
68 | 72 | obs_action_encoding_dim=128,
|
69 | 73 | actor_utd=1,
|
70 |
| - critic_utd=5, |
| 74 | + critic_utd=2, |
| 75 | + critic_respect_exp_batch_size=True, |
71 | 76 | target_critic_tau=0.005,
|
72 | 77 | target_critic_period=1,
|
73 |
| - target_critic_use_ema=False, |
74 |
| - target_actor_tau=0.05, |
75 |
| - target_actor_period=1, |
76 |
| - target_actor_use_ema=False) |
| 78 | + target_critic_use_ema=False) |
77 | 79 |
|
78 | 80 | alf.config(
|
79 | 81 | 'TransformerEncoder',
|
|
0 commit comments