-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimal_lr.py
85 lines (83 loc) · 5.15 KB
/
optimal_lr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
optimal_lr_dict = {
'dqn':
{'Sokoban-Push_5x5_1_120':1e-3, 'Sokoban-Push_5x5_2_120':1e-3, 'Sokoban-Push_6x6_1_120':1e-4,
'Sokoban-Push_6x6_2_120':1e-4, 'Sokoban-Push_6x6_3_120':3e-4, 'Sokoban-Push_7x7_1_120':1e-4,
'Sokoban-Push_7x7_2_120':1e-4,
'MiniGrid-DoorKey-6x6-v0':3e-4, 'MiniGrid-Unlock-v0':3e-4,'MiniGrid-RedBlueDoors-6x6-v0':3e-4,
'MiniGrid-SimpleCrossingS9N1-v0':1e-4,'MiniGrid-SimpleCrossingS9N2-v0':3e-4,
'MiniGrid-LavaCrossingS9N1-v0':3e-4,'MiniGrid-LavaCrossingS9N2-v0':1e-4,
'MinAtar/Asterix-v0':3e-5, 'MinAtar/Breakout-v0':3e-4,'MinAtar/Freeway-v0':3e-4,
'MinAtar/Seaquest-v0':1e-4, 'MinAtar/SpaceInvaders-v0':3e-4,'cliffworld':1e-4}
,
'DDQN-EBU':
{'Sokoban-Push_5x5_1_120':3e-5, 'Sokoban-Push_5x5_2_120':1e-4, 'Sokoban-Push_6x6_1_120':1e-5,
'Sokoban-Push_6x6_2_120':1e-5, 'Sokoban-Push_6x6_3_120':1e-5, 'Sokoban-Push_7x7_1_120':1e-5,
'Sokoban-Push_7x7_2_120':1e-5,
'MiniGrid-DoorKey-6x6-v0':1e-5, 'MiniGrid-Unlock-v0':1e-5, 'MiniGrid-RedBlueDoors-6x6-v0':1e-5,
'MiniGrid-SimpleCrossingS9N1-v0':1e-4, 'MiniGrid-SimpleCrossingS9N2-v0':1e-4,
'MiniGrid-LavaCrossingS9N1-v0':1e-4, 'MiniGrid-LavaCrossingS9N2-v0':1e-5,
'MinAtar/Asterix-v0':3e-5, 'MinAtar/Breakout-v0':3e-4, 'MinAtar/Freeway-v0':1e-4,
'MinAtar/Seaquest-v0':1e-4, 'MinAtar/SpaceInvaders-v0':1e-4}
,
'DISCOR-DDQN-UER':
{'Sokoban-Push_5x5_1_120':3e-4, 'Sokoban-Push_5x5_2_120':1e-4, 'Sokoban-Push_6x6_1_120':3e-4,
'Sokoban-Push_6x6_2_120':1e-4, 'Sokoban-Push_6x6_3_120':3e-4, 'Sokoban-Push_7x7_1_120':1e-4,
'Sokoban-Push_7x7_2_120':1e-4,
'MiniGrid-DoorKey-6x6-v0':1e-4, 'MiniGrid-Unlock-v0':3e-5, 'MiniGrid-RedBlueDoors-6x6-v0':3e-4,
'MiniGrid-SimpleCrossingS9N1-v0':1e-4, 'MiniGrid-SimpleCrossingS9N2-v0':3e-4,
'MiniGrid-LavaCrossingS9N1-v0':1e-4, 'MiniGrid-LavaCrossingS9N2-v0':1e-4,
'MinAtar/Asterix-v0':3e-5, 'MinAtar/Breakout-v0':1e-3, 'MinAtar/Freeway-v0':1e-4,
'MinAtar/Seaquest-v0':1e-4, 'MinAtar/SpaceInvaders-v0':3e-4}
,
'DDQN-UER':
{'Sokoban-Push_5x5_1_120':3e-4, 'Sokoban-Push_5x5_2_120':1e-4, 'Sokoban-Push_6x6_1_120':3e-4,
'Sokoban-Push_6x6_2_120':1e-4, 'Sokoban-Push_6x6_3_120':1e-4, 'Sokoban-Push_7x7_1_120':1e-5,
'Sokoban-Push_7x7_2_120':3e-4,
'MiniGrid-DoorKey-6x6-v0':3e-4, 'MiniGrid-Unlock-v0':1e-4, 'MiniGrid-RedBlueDoors-6x6-v0':3e-4,
'MiniGrid-SimpleCrossingS9N1-v0':1e-4, 'MiniGrid-SimpleCrossingS9N2-v0':1e-4,
'MiniGrid-LavaCrossingS9N1-v0':1e-4, 'MiniGrid-LavaCrossingS9N2-v0':1e-4,
'MinAtar/Asterix-v0':1e-4, 'MinAtar/Breakout-v0':3e-4, 'MinAtar/Freeway-v0':1e-4,
'MinAtar/Seaquest-v0':1e-4, 'MinAtar/SpaceInvaders-v0':3e-4}
,
'DDQN-PER':
{'Sokoban-Push_5x5_1_120':3e-4, 'Sokoban-Push_5x5_2_120':3e-4, 'Sokoban-Push_6x6_1_120':3e-4,
'Sokoban-Push_6x6_2_120':3e-5, 'Sokoban-Push_6x6_3_120':1e-4, 'Sokoban-Push_7x7_1_120':1e-4,
'Sokoban-Push_7x7_2_120':1e-4,
'MiniGrid-DoorKey-6x6-v0':1e-4, 'MiniGrid-Unlock-v0':1e-4, 'MiniGrid-RedBlueDoors-6x6-v0':1e-4,
'MiniGrid-SimpleCrossingS9N1-v0':1e-4, 'MiniGrid-SimpleCrossingS9N2-v0':3e-4,
'MiniGrid-LavaCrossingS9N1-v0':1e-4, 'MiniGrid-LavaCrossingS9N2-v0':1e-4,
'MinAtar/Asterix-v0':3e-5, 'MinAtar/Breakout-v0':3e-4, 'MinAtar/Freeway-v0':3e-5,
'MinAtar/Seaquest-v0':1e-4, 'MinAtar/SpaceInvaders-v0':1e-4}
,
'DDQN-TER':
{'Sokoban-Push_5x5_1_120':1e-4, 'Sokoban-Push_5x5_2_120':1e-3, 'Sokoban-Push_6x6_1_120':3e-4,
'Sokoban-Push_6x6_2_120':1e-4, 'Sokoban-Push_6x6_3_120':3e-4, 'Sokoban-Push_7x7_1_120':3e-4,
'Sokoban-Push_7x7_2_120':1e-4,
'MiniGrid-DoorKey-6x6-v0':3e-4, 'MiniGrid-Unlock-v0':3e-4, 'MiniGrid-RedBlueDoors-6x6-v0':3e-4,
'MiniGrid-SimpleCrossingS9N1-v0':3e-4, 'MiniGrid-SimpleCrossingS9N2-v0':3e-4,
'MiniGrid-LavaCrossingS9N1-v0':3e-4, 'MiniGrid-LavaCrossingS9N2-v0':3e-4,
'MinAtar/Asterix-v0':3e-4, 'MinAtar/Breakout-v0':3e-4, 'MinAtar/Freeway-v0':3e-4,
'MinAtar/Seaquest-v0':3e-4, 'MinAtar/SpaceInvaders-v0':3e-4}
}
optimal_para_kl_dict = {
'Sokoban-Push_5x5_1_120': [0.8, 1.0, 0.1],
'Sokoban-Push_6x6_1_120': [0.8, 2.0, 0.01],
'Sokoban-Push_7x7_1_120': [0.5,2.0,0.01],
'Sokoban-Push_6x6_3_120': [0.0, 2.0, 0.1],
'Sokoban-Push_5x5_2_120': [0.8, 5.0, 0.01],
'Sokoban-Push_6x6_2_120': [0.5, 5.0, 0.1],
'Sokoban-Push_7x7_2_120': [0.8, 5.0, 0.01],
'MiniGrid-DoorKey-6x6-v0': [0.5,2.0,0.01],
'MiniGrid-Unlock-v0': [0.5,5.0,0.01],
'MiniGrid-RedBlueDoors-6x6-v0': [0.5,2.0,0.01],
'MiniGrid-SimpleCrossingS9N1-v0': [0.8, 2.0, 0.01],
'MiniGrid-SimpleCrossingS9N2-v0': [0.2, 5.0, 0.001],
'MiniGrid-LavaCrossingS9N1-v0': [0.5, 2.0, 0.01],
'MiniGrid-LavaCrossingS9N2-v0': [0.2, 5.0, 0.01],
'MinAtar/Asterix-v0': [0.0, 0.01, 0.1],
'MinAtar/Breakout-v0': [0.5,2.0,0.01],
'MinAtar/Freeway-v0': [0.0,0.1,0.01],
'MinAtar/Seaquest-v0': [0.0, 0.01, 1.0],
'MinAtar/SpaceInvaders-v0': [0.0,2.0,1.0],
'PongNoFrameskip-v4': [0.2,5.0,1.0],
} # sample_method_para, policy_loss_para, tau