Skip to content

Commit 1779241

Browse files
authored
[Fix] Removed the possibility to create an Obs from data on several replica (#258)
* [Fix] Removed the possibility to create an Obs from data on several replica * [Fix] extended tests and corrected a small bug in the previous commit --------- Co-authored-by: Simon Kuberski <[email protected]>
1 parent dd4f852 commit 1779241

File tree

6 files changed

+111
-61
lines changed

6 files changed

+111
-61
lines changed

pyerrors/input/dobs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ def import_dobs_string(content, full_output=False, separator_insertion=True):
529529
deltas.append(repdeltas)
530530
idl.append(repidl)
531531

532-
res.append(Obs(deltas, obs_names, idl=idl))
532+
obsmeans = [np.average(deltas[j]) for j in range(len(deltas))]
533+
res.append(Obs([np.array(deltas[j]) - obsmeans[j] for j in range(len(obsmeans))], obs_names, idl=idl, means=obsmeans))
533534
res[-1]._value = mean[i]
534535
_check(len(e_names) == ne)
535536

pyerrors/input/json.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,11 @@ def _nan_Obs_like(obs):
133133
names = []
134134
idl = []
135135
for key, value in obs.idl.items():
136-
samples.append([np.nan] * len(value))
136+
samples.append(np.array([np.nan] * len(value)))
137137
names.append(key)
138138
idl.append(value)
139-
my_obs = Obs(samples, names, idl)
139+
my_obs = Obs(samples, names, idl, means=[np.nan for n in names])
140+
my_obs._value = np.nan
140141
my_obs._covobs = obs._covobs
141142
for name in obs._covobs:
142143
my_obs.names.append(name)
@@ -331,7 +332,8 @@ def get_Obs_from_dict(o):
331332
cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
332333

333334
if od:
334-
ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl'])
335+
r_offsets = [np.average([ddi[0] for ddi in di]) for di in od['deltas']]
336+
ret = Obs([np.array([ddi[0] for ddi in od['deltas'][i]]) - r_offsets[i] for i in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[0] for ro in r_offsets])
335337
ret._value = values[0]
336338
else:
337339
ret = Obs([], [], means=[])
@@ -356,7 +358,8 @@ def get_List_from_dict(o):
356358
taglist = o.get('tag', layout * [None])
357359
for i in range(layout):
358360
if od:
359-
ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl']))
361+
r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
362+
ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
360363
ret[-1]._value = values[i]
361364
else:
362365
ret.append(Obs([], [], means=[]))
@@ -383,7 +386,8 @@ def get_Array_from_dict(o):
383386
taglist = o.get('tag', N * [None])
384387
for i in range(N):
385388
if od:
386-
ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl']))
389+
r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
390+
ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
387391
ret[-1]._value = values[i]
388392
else:
389393
ret.append(Obs([], [], means=[]))

pyerrors/obs.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def __init__(self, samples, names, idl=None, **kwargs):
8282
raise ValueError('Names are not unique.')
8383
if not all(isinstance(x, str) for x in names):
8484
raise TypeError('All names have to be strings.')
85+
if len(set([o.split('|')[0] for o in names])) > 1:
86+
raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
8587
else:
8688
if not isinstance(names[0], str):
8789
raise TypeError('All names have to be strings.')
@@ -1407,6 +1409,8 @@ def reweight(weight, obs, **kwargs):
14071409
raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
14081410
if not set(obs[i].names).issubset(weight.names):
14091411
raise ValueError('Error: Ensembles do not fit')
1412+
if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
1413+
raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
14101414
for name in obs[i].names:
14111415
if not set(obs[i].idl[name]).issubset(weight.idl[name]):
14121416
raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
@@ -1442,9 +1446,12 @@ def correlate(obs_a, obs_b):
14421446
-----
14431447
Keep in mind to only correlate primary observables which have not been reweighted
14441448
yet. The reweighting has to be applied after correlating the observables.
1445-
Currently only works if ensembles are identical (this is not strictly necessary).
1449+
Only works if a single ensemble is present in the Obs.
1450+
Currently only works if ensemble content is identical (this is not strictly necessary).
14461451
"""
14471452

1453+
if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
1454+
raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
14481455
if sorted(obs_a.names) != sorted(obs_b.names):
14491456
raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
14501457
if len(obs_a.cov_names) or len(obs_b.cov_names):
@@ -1755,7 +1762,11 @@ def import_bootstrap(boots, name, random_numbers):
17551762

17561763

17571764
def merge_obs(list_of_obs):
1758-
"""Combine all observables in list_of_obs into one new observable
1765+
"""Combine all observables in list_of_obs into one new observable.
1766+
This allows to merge Obs that have been computed on multiple replica
1767+
of the same ensemble.
1768+
If you like to merge Obs that are based on several ensembles, please
1769+
average them yourself.
17591770
17601771
Parameters
17611772
----------

tests/json_io_test.py

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_jsonio():
1212
o = pe.pseudo_Obs(1.0, .2, 'one')
1313
o2 = pe.pseudo_Obs(0.5, .1, 'two|r1')
1414
o3 = pe.pseudo_Obs(0.5, .1, 'two|r2')
15-
o4 = pe.merge_obs([o2, o3])
15+
o4 = pe.merge_obs([o2, o3, pe.pseudo_Obs(0.5, .1, 'two|r3', samples=3221)])
1616
otag = 'This has been merged!'
1717
o4.tag = otag
1818
do = o - .2 * o4
@@ -101,8 +101,8 @@ def test_json_string_reconstruction():
101101

102102

103103
def test_json_corr_io():
104-
my_list = [pe.Obs([np.random.normal(1.0, 0.1, 100)], ['ens1']) for o in range(8)]
105-
rw_list = pe.reweight(pe.Obs([np.random.normal(1.0, 0.1, 100)], ['ens1']), my_list)
104+
my_list = [pe.Obs([np.random.normal(1.0, 0.1, 100), np.random.normal(1.0, 0.1, 321)], ['ens1|r1', 'ens1|r2'], idl=[range(1, 201, 2), range(321)]) for o in range(8)]
105+
rw_list = pe.reweight(pe.Obs([np.random.normal(1.0, 0.1, 100), np.random.normal(1.0, 0.1, 321)], ['ens1|r1', 'ens1|r2'], idl=[range(1, 201, 2), range(321)]), my_list)
106106

107107
for obs_list in [my_list, rw_list]:
108108
for tag in [None, "test"]:
@@ -111,40 +111,51 @@ def test_json_corr_io():
111111
for corr_tag in [None, 'my_Corr_tag']:
112112
for prange in [None, [3, 6]]:
113113
for gap in [False, True]:
114-
my_corr = pe.Corr(obs_list, padding=[pad, pad], prange=prange)
115-
my_corr.tag = corr_tag
116-
if gap:
117-
my_corr.content[4] = None
118-
pe.input.json.dump_to_json(my_corr, 'corr')
119-
recover = pe.input.json.load_json('corr')
120-
os.remove('corr.json.gz')
121-
assert np.all([o.is_zero() for o in [x for x in (my_corr - recover) if x is not None]])
122-
for index, entry in enumerate(my_corr):
123-
if entry is None:
124-
assert recover[index] is None
125-
assert my_corr.tag == recover.tag
126-
assert my_corr.prange == recover.prange
127-
assert my_corr.reweighted == recover.reweighted
114+
for mult in [1., pe.cov_Obs([12.22, 1.21], [.212**2, .11**2], 'renorm')[0]]:
115+
my_corr = mult * pe.Corr(obs_list, padding=[pad, pad], prange=prange)
116+
my_corr.tag = corr_tag
117+
if gap:
118+
my_corr.content[4] = None
119+
pe.input.json.dump_to_json(my_corr, 'corr')
120+
recover = pe.input.json.load_json('corr')
121+
os.remove('corr.json.gz')
122+
assert np.all([o.is_zero() for o in [x for x in (my_corr - recover) if x is not None]])
123+
for index, entry in enumerate(my_corr):
124+
if entry is None:
125+
assert recover[index] is None
126+
assert my_corr.tag == recover.tag
127+
assert my_corr.prange == recover.prange
128+
assert my_corr.reweighted == recover.reweighted
128129

129130

130131
def test_json_corr_2d_io():
131-
obs_list = [np.array([[pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test'), pe.pseudo_Obs(0.0, 0.1 * i, 'test')], [pe.pseudo_Obs(0.0, 0.1 * i, 'test'), pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test')]]) for i in range(4)]
132+
obs_list = [np.array([
133+
[
134+
pe.merge_obs([pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r2'), pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r1', samples=321)]),
135+
pe.merge_obs([pe.pseudo_Obs(0.0, 0.1 * i, 'test|r2'), pe.pseudo_Obs(0.0, 0.1 * i, 'test|r1', samples=321)]),
136+
],
137+
[
138+
pe.merge_obs([pe.pseudo_Obs(0.0, 0.1 * i, 'test|r2'), pe.pseudo_Obs(0.0, 0.1 * i, 'test|r1', samples=321),]),
139+
pe.merge_obs([pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r2'), pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r1', samples=321)]),
140+
],
141+
]) for i in range(4)]
132142

133143
for tag in [None, "test"]:
134144
obs_list[3][0, 1].tag = tag
135145
for padding in [0, 1]:
136146
for prange in [None, [3, 6]]:
137-
my_corr = pe.Corr(obs_list, padding=[padding, padding], prange=prange)
138-
my_corr.tag = tag
139-
pe.input.json.dump_to_json(my_corr, 'corr')
140-
recover = pe.input.json.load_json('corr')
141-
os.remove('corr.json.gz')
142-
assert np.all([np.all([o.is_zero() for o in q]) for q in [x.ravel() for x in (my_corr - recover) if x is not None]])
143-
for index, entry in enumerate(my_corr):
144-
if entry is None:
145-
assert recover[index] is None
146-
assert my_corr.tag == recover.tag
147-
assert my_corr.prange == recover.prange
147+
for mult in [1., pe.cov_Obs([12.22, 1.21], [.212**2, .11**2], 'renorm')[0]]:
148+
my_corr = mult * pe.Corr(obs_list, padding=[padding, padding], prange=prange)
149+
my_corr.tag = tag
150+
pe.input.json.dump_to_json(my_corr, 'corr')
151+
recover = pe.input.json.load_json('corr')
152+
os.remove('corr.json.gz')
153+
assert np.all([np.all([o.is_zero() for o in q]) for q in [x.ravel() for x in (my_corr - recover) if x is not None]])
154+
for index, entry in enumerate(my_corr):
155+
if entry is None:
156+
assert recover[index] is None
157+
assert my_corr.tag == recover.tag
158+
assert my_corr.prange == recover.prange
148159

149160

150161
def test_json_dict_io():
@@ -211,6 +222,7 @@ def list_check_obs(l1, l2):
211222
'd': pe.pseudo_Obs(.01, .001, 'testd', samples=10) * pe.cov_Obs(1, .01, 'cov1'),
212223
'se': None,
213224
'sf': 1.2,
225+
'k': pe.cov_Obs(.1, .001**2, 'cov') * pe.merge_obs([pe.pseudo_Obs(1.0, 0.1, 'test|r2'), pe.pseudo_Obs(1.0, 0.1, 'test|r1', samples=321)]),
214226
}
215227
}
216228

@@ -314,7 +326,7 @@ def test_dobsio():
314326

315327
o2 = pe.pseudo_Obs(0.5, .1, 'two|r1')
316328
o3 = pe.pseudo_Obs(0.5, .1, 'two|r2')
317-
o4 = pe.merge_obs([o2, o3])
329+
o4 = pe.merge_obs([o2, o3, pe.pseudo_Obs(0.5, .1, 'two|r3', samples=3221)])
318330
otag = 'This has been merged!'
319331
o4.tag = otag
320332
do = o - .2 * o4
@@ -328,7 +340,7 @@ def test_dobsio():
328340
o5 /= co2[0]
329341
o5.tag = 2 * otag
330342

331-
tt1 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(2, 202, 2), range(22, 222, 2)])
343+
tt1 = pe.Obs([np.random.rand(100), np.random.rand(102)], ['t|r1', 't|r2'], idl=[range(2, 202, 2), range(22, 226, 2)])
332344
tt3 = pe.Obs([np.random.rand(102)], ['qe|r1'])
333345

334346
tt = tt1 + tt3
@@ -337,7 +349,7 @@ def test_dobsio():
337349

338350
tt4 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(1, 101, 1), range(2, 202, 2)])
339351

340-
ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt)]
352+
ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt), o4.reweight(o4)]
341353
print(ol)
342354
fname = 'test_rw'
343355

@@ -362,19 +374,25 @@ def test_dobsio():
362374

363375

364376
def test_reconstruct_non_linear_r_obs(tmp_path):
365-
to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)],
366-
["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"],
367-
idl=[range(1, 501), range(0, 500), range(1, 999, 9)])
377+
to = (
378+
pe.Obs([np.random.rand(500), np.random.rand(1200)],
379+
["e|r1", "e|r2", ],
380+
idl=[range(1, 501), range(0, 1200)])
381+
+ pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
382+
)
368383
to = np.log(to ** 2) / to
369384
to.dump((tmp_path / "test_equality").as_posix())
370385
ro = pe.input.json.load_json((tmp_path / "test_equality").as_posix())
371386
assert assert_equal_Obs(to, ro)
372387

373388

374389
def test_reconstruct_non_linear_r_obs_list(tmp_path):
375-
to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)],
376-
["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"],
377-
idl=[range(1, 501), range(0, 500), range(1, 999, 9)])
390+
to = (
391+
pe.Obs([np.random.rand(500), np.random.rand(1200)],
392+
["e|r1", "e|r2", ],
393+
idl=[range(1, 501), range(0, 1200)])
394+
+ pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
395+
)
378396
to = np.log(to ** 2) / to
379397
for to_list in [[to, to, to], np.array([to, to, to])]:
380398
pe.input.json.dump_to_json(to_list, (tmp_path / "test_equality_list").as_posix())

tests/linalg_test.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_matmul():
3434
my_list = []
3535
length = 100 + np.random.randint(200)
3636
for i in range(dim ** 2):
37-
my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
37+
my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
3838
my_array = const * np.array(my_list).reshape((dim, dim))
3939
tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
4040
for t, e in np.ndenumerate(tt):
@@ -43,8 +43,8 @@ def test_matmul():
4343
my_list = []
4444
length = 100 + np.random.randint(200)
4545
for i in range(dim ** 2):
46-
my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
47-
pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
46+
my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
47+
pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
4848
my_array = np.array(my_list).reshape((dim, dim)) * const
4949
tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
5050
for t, e in np.ndenumerate(tt):
@@ -151,7 +151,7 @@ def test_multi_dot():
151151
my_list = []
152152
length = 1000 + np.random.randint(200)
153153
for i in range(dim ** 2):
154-
my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
154+
my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
155155
my_array = pe.cov_Obs(1.0, 0.002, 'cov') * np.array(my_list).reshape((dim, dim))
156156
tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
157157
for t, e in np.ndenumerate(tt):
@@ -160,8 +160,8 @@ def test_multi_dot():
160160
my_list = []
161161
length = 1000 + np.random.randint(200)
162162
for i in range(dim ** 2):
163-
my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
164-
pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
163+
my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
164+
pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
165165
my_array = np.array(my_list).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov')
166166
tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
167167
for t, e in np.ndenumerate(tt):
@@ -209,7 +209,7 @@ def test_irregular_matrix_inverse():
209209
for idl in [range(8, 508, 10), range(250, 273), [2, 8, 19, 20, 78, 99, 828, 10548979]]:
210210
irregular_array = []
211211
for i in range(dim ** 2):
212-
irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl)), np.random.normal(0.25, 0.1, 10)], ['ens1', 'ens2'], idl=[idl, range(1, 11)]))
212+
irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl))], ['ens1'], idl=[idl]) + pe.Obs([np.random.normal(0.25, 0.1, 10)], ['ens2'], idl=[range(1, 11)]))
213213
irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(1.0, 0.002, 'ens2|r23')
214214

215215
invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T

0 commit comments

Comments
 (0)