[Fix] Removed the possibility to create an Obs from data on several replica (#258)

s-kuberski · web-flow · commit 17792418ed32 · 2025-02-25T16:58:44.000+01:00
* [Fix] Removed the possibility to create an Obs from data on several replica

* [Fix] extended tests and corrected a small bug in the previous commit

---------

Co-authored-by: Simon Kuberski &lt;simon.kuberski@cern.ch&gt;
diff --git a/pyerrors/input/dobs.py b/pyerrors/input/dobs.py
@@ -529,7 +529,8 @@ def import_dobs_string(content, full_output=False, separator_insertion=True):
                 deltas.append(repdeltas)
                 idl.append(repidl)
 
-        res.append(Obs(deltas, obs_names, idl=idl))
+        obsmeans = [np.average(deltas[j]) for j in range(len(deltas))]
+        res.append(Obs([np.array(deltas[j]) - obsmeans[j] for j in range(len(obsmeans))], obs_names, idl=idl, means=obsmeans))
         res[-1]._value = mean[i]
     _check(len(e_names) == ne)
 
diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py
@@ -133,10 +133,11 @@ def _nan_Obs_like(obs):
         names = []
         idl = []
         for key, value in obs.idl.items():
-            samples.append([np.nan] * len(value))
+            samples.append(np.array([np.nan] * len(value)))
             names.append(key)
             idl.append(value)
-        my_obs = Obs(samples, names, idl)
+        my_obs = Obs(samples, names, idl, means=[np.nan for n in names])
+        my_obs._value = np.nan
         my_obs._covobs = obs._covobs
         for name in obs._covobs:
             my_obs.names.append(name)
@@ -331,7 +332,8 @@ def get_Obs_from_dict(o):
         cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
 
         if od:
-            ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl'])
+            r_offsets = [np.average([ddi[0] for ddi in di]) for di in od['deltas']]
+            ret = Obs([np.array([ddi[0] for ddi in od['deltas'][i]]) - r_offsets[i] for i in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[0] for ro in r_offsets])
             ret._value = values[0]
         else:
             ret = Obs([], [], means=[])
@@ -356,7 +358,8 @@ def get_List_from_dict(o):
         taglist = o.get('tag', layout * [None])
         for i in range(layout):
             if od:
-                ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl']))
+                r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
+                ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
                 ret[-1]._value = values[i]
             else:
                 ret.append(Obs([], [], means=[]))
@@ -383,7 +386,8 @@ def get_Array_from_dict(o):
         taglist = o.get('tag', N * [None])
         for i in range(N):
             if od:
-                ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl']))
+                r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
+                ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
                 ret[-1]._value = values[i]
             else:
                 ret.append(Obs([], [], means=[]))
diff --git a/pyerrors/obs.py b/pyerrors/obs.py
@@ -82,6 +82,8 @@ def __init__(self, samples, names, idl=None, **kwargs):
                     raise ValueError('Names are not unique.')
                 if not all(isinstance(x, str) for x in names):
                     raise TypeError('All names have to be strings.')
+                if len(set([o.split('|')[0] for o in names])) > 1:
+                    raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
             else:
                 if not isinstance(names[0], str):
                     raise TypeError('All names have to be strings.')
@@ -1407,6 +1409,8 @@ def reweight(weight, obs, **kwargs):
             raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
         if not set(obs[i].names).issubset(weight.names):
             raise ValueError('Error: Ensembles do not fit')
+        if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
+            raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
         for name in obs[i].names:
             if not set(obs[i].idl[name]).issubset(weight.idl[name]):
                 raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
@@ -1442,9 +1446,12 @@ def correlate(obs_a, obs_b):
     -----
     Keep in mind to only correlate primary observables which have not been reweighted
     yet. The reweighting has to be applied after correlating the observables.
-    Currently only works if ensembles are identical (this is not strictly necessary).
+    Only works if a single ensemble is present in the Obs.
+    Currently only works if ensemble content is identical (this is not strictly necessary).
     """
 
+    if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
+        raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
     if sorted(obs_a.names) != sorted(obs_b.names):
         raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
     if len(obs_a.cov_names) or len(obs_b.cov_names):
@@ -1755,7 +1762,11 @@ def import_bootstrap(boots, name, random_numbers):
 
 
 def merge_obs(list_of_obs):
-    """Combine all observables in list_of_obs into one new observable
+    """Combine all observables in list_of_obs into one new observable.
+    This allows to merge Obs that have been computed on multiple replica
+    of the same ensemble.
+    If you like to merge Obs that are based on several ensembles, please
+    average them yourself.
 
     Parameters
     ----------
diff --git a/tests/json_io_test.py b/tests/json_io_test.py
@@ -12,7 +12,7 @@ def test_jsonio():
     o = pe.pseudo_Obs(1.0, .2, 'one')
     o2 = pe.pseudo_Obs(0.5, .1, 'two|r1')
     o3 = pe.pseudo_Obs(0.5, .1, 'two|r2')
-    o4 = pe.merge_obs([o2, o3])
+    o4 = pe.merge_obs([o2, o3, pe.pseudo_Obs(0.5, .1, 'two|r3', samples=3221)])
     otag = 'This has been merged!'
     o4.tag = otag
     do = o - .2 * o4
@@ -101,8 +101,8 @@ def test_json_string_reconstruction():
 
 
 def test_json_corr_io():
-    my_list = [pe.Obs([np.random.normal(1.0, 0.1, 100)], ['ens1']) for o in range(8)]
-    rw_list = pe.reweight(pe.Obs([np.random.normal(1.0, 0.1, 100)], ['ens1']), my_list)
+    my_list = [pe.Obs([np.random.normal(1.0, 0.1, 100), np.random.normal(1.0, 0.1, 321)], ['ens1|r1', 'ens1|r2'], idl=[range(1, 201, 2), range(321)]) for o in range(8)]
+    rw_list = pe.reweight(pe.Obs([np.random.normal(1.0, 0.1, 100), np.random.normal(1.0, 0.1, 321)], ['ens1|r1', 'ens1|r2'], idl=[range(1, 201, 2), range(321)]), my_list)
 
     for obs_list in [my_list, rw_list]:
         for tag in [None, "test"]:
@@ -111,40 +111,51 @@ def test_json_corr_io():
                 for corr_tag in [None, 'my_Corr_tag']:
                     for prange in [None, [3, 6]]:
                         for gap in [False, True]:
-                            my_corr = pe.Corr(obs_list, padding=[pad, pad], prange=prange)
-                            my_corr.tag = corr_tag
-                            if gap:
-                                my_corr.content[4] = None
-                            pe.input.json.dump_to_json(my_corr, 'corr')
-                            recover = pe.input.json.load_json('corr')
-                            os.remove('corr.json.gz')
-                            assert np.all([o.is_zero() for o in [x for x in (my_corr - recover) if x is not None]])
-                            for index, entry in enumerate(my_corr):
-                                if entry is None:
-                                    assert recover[index] is None
-                            assert my_corr.tag == recover.tag
-                            assert my_corr.prange == recover.prange
-                            assert my_corr.reweighted == recover.reweighted
+                            for mult in [1., pe.cov_Obs([12.22, 1.21], [.212**2, .11**2], 'renorm')[0]]:
+                                my_corr = mult * pe.Corr(obs_list, padding=[pad, pad], prange=prange)
+                                my_corr.tag = corr_tag
+                                if gap:
+                                    my_corr.content[4] = None
+                                pe.input.json.dump_to_json(my_corr, 'corr')
+                                recover = pe.input.json.load_json('corr')
+                                os.remove('corr.json.gz')
+                                assert np.all([o.is_zero() for o in [x for x in (my_corr - recover) if x is not None]])
+                                for index, entry in enumerate(my_corr):
+                                    if entry is None:
+                                        assert recover[index] is None
+                                assert my_corr.tag == recover.tag
+                                assert my_corr.prange == recover.prange
+                                assert my_corr.reweighted == recover.reweighted
 
 
 def test_json_corr_2d_io():
-    obs_list = [np.array([[pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test'), pe.pseudo_Obs(0.0, 0.1 * i, 'test')], [pe.pseudo_Obs(0.0, 0.1 * i, 'test'), pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test')]]) for i in range(4)]
+    obs_list = [np.array([
+        [
+         pe.merge_obs([pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r2'), pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r1', samples=321)]), 
+         pe.merge_obs([pe.pseudo_Obs(0.0, 0.1 * i, 'test|r2'), pe.pseudo_Obs(0.0, 0.1 * i, 'test|r1', samples=321)]),
+        ], 
+        [
+         pe.merge_obs([pe.pseudo_Obs(0.0, 0.1 * i, 'test|r2'), pe.pseudo_Obs(0.0, 0.1 * i, 'test|r1', samples=321),]),
+         pe.merge_obs([pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r2'), pe.pseudo_Obs(1.0 + i, 0.1 * i, 'test|r1', samples=321)]),
+        ],
+        ]) for i in range(4)]
 
     for tag in [None, "test"]:
         obs_list[3][0, 1].tag = tag
         for padding in [0, 1]:
             for prange in [None, [3, 6]]:
-                my_corr = pe.Corr(obs_list, padding=[padding, padding], prange=prange)
-                my_corr.tag = tag
-                pe.input.json.dump_to_json(my_corr, 'corr')
-                recover = pe.input.json.load_json('corr')
-                os.remove('corr.json.gz')
-                assert np.all([np.all([o.is_zero() for o in q]) for q in [x.ravel() for x in (my_corr - recover) if x is not None]])
-                for index, entry in enumerate(my_corr):
-                    if entry is None:
-                        assert recover[index] is None
-                assert my_corr.tag == recover.tag
-                assert my_corr.prange == recover.prange
+                for mult in [1., pe.cov_Obs([12.22, 1.21], [.212**2, .11**2], 'renorm')[0]]:
+                    my_corr = mult * pe.Corr(obs_list, padding=[padding, padding], prange=prange)
+                    my_corr.tag = tag
+                    pe.input.json.dump_to_json(my_corr, 'corr')
+                    recover = pe.input.json.load_json('corr')
+                    os.remove('corr.json.gz')
+                    assert np.all([np.all([o.is_zero() for o in q]) for q in [x.ravel() for x in (my_corr - recover) if x is not None]])
+                    for index, entry in enumerate(my_corr):
+                        if entry is None:
+                            assert recover[index] is None
+                    assert my_corr.tag == recover.tag
+                    assert my_corr.prange == recover.prange
 
 
 def test_json_dict_io():
@@ -211,6 +222,7 @@ def list_check_obs(l1, l2):
             'd': pe.pseudo_Obs(.01, .001, 'testd', samples=10) * pe.cov_Obs(1, .01, 'cov1'),
             'se': None,
             'sf': 1.2,
+            'k': pe.cov_Obs(.1, .001**2, 'cov') * pe.merge_obs([pe.pseudo_Obs(1.0, 0.1, 'test|r2'), pe.pseudo_Obs(1.0, 0.1, 'test|r1', samples=321)]),
         }
     }
 
@@ -314,7 +326,7 @@ def test_dobsio():
 
     o2 = pe.pseudo_Obs(0.5, .1, 'two|r1')
     o3 = pe.pseudo_Obs(0.5, .1, 'two|r2')
-    o4 = pe.merge_obs([o2, o3])
+    o4 = pe.merge_obs([o2, o3, pe.pseudo_Obs(0.5, .1, 'two|r3', samples=3221)])
     otag = 'This has been merged!'
     o4.tag = otag
     do = o - .2 * o4
@@ -328,7 +340,7 @@ def test_dobsio():
     o5 /= co2[0]
     o5.tag = 2 * otag
 
-    tt1 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(2, 202, 2), range(22, 222, 2)])
+    tt1 = pe.Obs([np.random.rand(100), np.random.rand(102)], ['t|r1', 't|r2'], idl=[range(2, 202, 2), range(22, 226, 2)])
     tt3 = pe.Obs([np.random.rand(102)], ['qe|r1'])
 
     tt = tt1 + tt3
@@ -337,7 +349,7 @@ def test_dobsio():
 
     tt4 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(1, 101, 1), range(2, 202, 2)])
 
-    ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt)]
+    ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt), o4.reweight(o4)]
     print(ol)
     fname = 'test_rw'
 
@@ -362,19 +374,25 @@ def test_dobsio():
 
 
 def test_reconstruct_non_linear_r_obs(tmp_path):
-    to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)],
-                ["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"],
-                idl=[range(1, 501), range(0, 500), range(1, 999, 9)])
+    to = (
+        pe.Obs([np.random.rand(500), np.random.rand(1200)],
+                ["e|r1", "e|r2", ],
+                idl=[range(1, 501), range(0, 1200)])
+        + pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
+        )
     to = np.log(to ** 2) / to
     to.dump((tmp_path / "test_equality").as_posix())
     ro = pe.input.json.load_json((tmp_path / "test_equality").as_posix())
     assert assert_equal_Obs(to, ro)
 
 
 def test_reconstruct_non_linear_r_obs_list(tmp_path):
-    to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)],
-                ["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"],
-                idl=[range(1, 501), range(0, 500), range(1, 999, 9)])
+    to = (
+        pe.Obs([np.random.rand(500), np.random.rand(1200)],
+                ["e|r1", "e|r2", ],
+                idl=[range(1, 501), range(0, 1200)])
+        + pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
+        )
     to = np.log(to ** 2) / to
     for to_list in [[to, to, to], np.array([to, to, to])]:
         pe.input.json.dump_to_json(to_list, (tmp_path / "test_equality_list").as_posix())
diff --git a/tests/linalg_test.py b/tests/linalg_test.py
@@ -34,7 +34,7 @@ def test_matmul():
             my_list = []
             length = 100 + np.random.randint(200)
             for i in range(dim ** 2):
-                my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
+                my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
             my_array = const * np.array(my_list).reshape((dim, dim))
             tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
             for t, e in np.ndenumerate(tt):
@@ -43,8 +43,8 @@ def test_matmul():
             my_list = []
             length = 100 + np.random.randint(200)
             for i in range(dim ** 2):
-                my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
-                                       pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
+                my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
+                                       pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
             my_array = np.array(my_list).reshape((dim, dim)) * const
             tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
             for t, e in np.ndenumerate(tt):
@@ -151,7 +151,7 @@ def test_multi_dot():
         my_list = []
         length = 1000 + np.random.randint(200)
         for i in range(dim ** 2):
-            my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
+            my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
         my_array = pe.cov_Obs(1.0, 0.002, 'cov') * np.array(my_list).reshape((dim, dim))
         tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
         for t, e in np.ndenumerate(tt):
@@ -160,8 +160,8 @@ def test_multi_dot():
         my_list = []
         length = 1000 + np.random.randint(200)
         for i in range(dim ** 2):
-            my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
-                                   pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
+            my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
+                                   pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
         my_array = np.array(my_list).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov')
         tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
         for t, e in np.ndenumerate(tt):
@@ -209,7 +209,7 @@ def test_irregular_matrix_inverse():
     for idl in [range(8, 508, 10), range(250, 273), [2, 8, 19, 20, 78, 99, 828, 10548979]]:
         irregular_array = []
         for i in range(dim ** 2):
-            irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl)), np.random.normal(0.25, 0.1, 10)], ['ens1', 'ens2'], idl=[idl, range(1, 11)]))
+            irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl))], ['ens1'], idl=[idl]) + pe.Obs([np.random.normal(0.25, 0.1, 10)], ['ens2'], idl=[range(1, 11)]))
         irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(1.0, 0.002, 'ens2|r23')
 
         invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T
diff --git a/tests/obs_test.py b/tests/obs_test.py