|
32 | 32 | import functools
|
33 | 33 | import io
|
34 | 34 | import itertools
|
| 35 | +import json |
35 | 36 | import math
|
36 | 37 | import numbers
|
37 | 38 | import warnings
|
|
46 | 47 | import tskit.combinatorics as combinatorics
|
47 | 48 | import tskit.drawing as drawing
|
48 | 49 | import tskit.metadata as metadata_module
|
| 50 | +import tskit.provenance as provenance |
49 | 51 | import tskit.tables as tables
|
50 | 52 | import tskit.text_formats as text_formats
|
51 | 53 | import tskit.util as util
|
@@ -7091,39 +7093,75 @@ def shift(self, value, sequence_length=None, record_provenance=True):
|
7091 | 7093 | return ts
|
7092 | 7094 |
|
7093 | 7095 | def concatenate(
|
7094 |
| - self, other, *, node_mapping=None, record_provenance=True, **kwargs |
| 7096 | + self, *args, node_mappings=None, record_provenance=True, add_populations=None |
7095 | 7097 | ):
|
7096 |
| - """ |
7097 |
| - Concatenate another tree sequence to the right of this one. This shifts the |
7098 |
| - coordinate system of the other tree sequence rightwards, then calls |
7099 |
| - {meth}`union` with the provided ``node_mapping``. If no node mapping |
7100 |
| - is given, matches sample nodes only, in numerical order. |
| 7098 | + r""" |
| 7099 | + Concatenate a set of tree sequences to the right of this one, by repeatedly |
| 7100 | + calling {meth}`union` with an (optional) |
| 7101 | + node mapping for each of the ``others``. If any node mapping is ``None`` |
| 7102 | + only map the sample nodes between the input tree sequence and this one, |
| 7103 | + based on the numerical order of sample node IDs. |
7101 | 7104 |
|
7102 | 7105 | .. note::
|
7103 | 7106 | To add gaps between the concatenated tables, use :meth:`shift` or
|
7104 | 7107 | to remove gaps, use :meth:`trim` before concatenating.
|
7105 | 7108 |
|
7106 |
| - :param TableCollection other: The other table collection to add to the right |
7107 |
| - of this one. |
7108 |
| - :param list node_mapping: An array of integers of the same length as the number |
7109 |
| - of nodes in ``other``, where the _k_'th element gives the id of the node in |
7110 |
| - the current table collection corresponding to node _k_ in the other table |
7111 |
| - collection (see :meth:`union`). If None (default), only the sample nodes |
7112 |
| - between the two node tables, in numerical order, are mapped to each other. |
7113 |
| - :param bool record_provenance: If True (default), record details of this call to |
7114 |
| - ``concatenate`` in the returned tree sequence's provenance information |
7115 |
| - (Default: True). |
7116 |
| - :param \\**kwargs: Additional keyword arguments to pass to :meth:`union`, |
7117 |
| - e.g. ``add_populations``. |
7118 |
| - """ |
| 7109 | + :param TreeSequence \*args: A list of other tree sequences to append to |
| 7110 | + the right of this one. |
| 7111 | + :param Union[list, None] node_mappings: An list of node mappings for each |
| 7112 | + input tree sequence in ``args``. Each should either be an array of |
| 7113 | + integers of the same length as the number of nodes in the equivalent |
| 7114 | + input tree sequence (see :meth:`union` for details), or ``None``. |
| 7115 | + If ``None``, only sample nodes are mapped to each other. |
| 7116 | + Default: ``None``, treated as ``[None] * len(args)``. |
| 7117 | + :param bool record_provenance: If True (default), record details of this |
| 7118 | + call to ``concatenate`` in the returned tree sequence's provenance |
| 7119 | + information (Default: True). |
| 7120 | + :param bool add_populations: If True (default), nodes new to ``self`` will |
| 7121 | + be assigned new population IDs (see :meth:`union`) |
| 7122 | + """ |
| 7123 | + if node_mappings is None: |
| 7124 | + node_mappings = [None] * len(args) |
| 7125 | + if add_populations is None: |
| 7126 | + add_populations = True |
| 7127 | + if len(node_mappings) != len(args): |
| 7128 | + raise ValueError( |
| 7129 | + "You must provide the same number of node_mappings as args" |
| 7130 | + ) |
7119 | 7131 |
|
| 7132 | + samples = self.samples() |
7120 | 7133 | tables = self.dump_tables()
|
7121 |
| - tables.concatenate( |
7122 |
| - other.tables, |
7123 |
| - node_mapping=node_mapping, |
7124 |
| - record_provenance=record_provenance, |
7125 |
| - **kwargs, |
7126 |
| - ) |
| 7134 | + tables.drop_index() |
| 7135 | + |
| 7136 | + for node_mapping, other in zip(node_mappings, args): |
| 7137 | + if node_mapping is None: |
| 7138 | + other_samples = other.samples() |
| 7139 | + if len(other_samples) != len(samples): |
| 7140 | + raise ValueError( |
| 7141 | + "each `other` must have the same number of samples as `self`" |
| 7142 | + ) |
| 7143 | + node_mapping = np.full(other.num_nodes, tskit.NULL, dtype=np.int32) |
| 7144 | + node_mapping[other_samples] = samples |
| 7145 | + other_tables = other.dump_tables() |
| 7146 | + other_tables.shift(tables.sequence_length, record_provenance=False) |
| 7147 | + tables.sequence_length = other_tables.sequence_length |
| 7148 | + # NB: should we use a different default for add_populations? |
| 7149 | + tables.union( |
| 7150 | + other_tables, |
| 7151 | + node_mapping=node_mapping, |
| 7152 | + check_shared_equality=False, # Else checks fail with internal samples |
| 7153 | + record_provenance=False, |
| 7154 | + add_populations=add_populations, |
| 7155 | + ) |
| 7156 | + if record_provenance: |
| 7157 | + parameters = { |
| 7158 | + "command": "concatenate", |
| 7159 | + "TODO": "add concatenate parameters", # tricky as both have provenances |
| 7160 | + } |
| 7161 | + tables.provenances.add_row( |
| 7162 | + record=json.dumps(provenance.get_provenance_dict(parameters)) |
| 7163 | + ) |
| 7164 | + |
7127 | 7165 | return tables.tree_sequence()
|
7128 | 7166 |
|
7129 | 7167 | def split_edges(self, time, *, flags=None, population=None, metadata=None):
|
|
0 commit comments