Skip to content

Commit 4c9eb26

Browse files
authored
Merge pull request #113 from cokelaer/main
implement --exclude-pattern option
2 parents bda77e2 + b38b14b commit 4c9eb26

File tree

6 files changed

+52
-9
lines changed

6 files changed

+52
-9
lines changed

README.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,8 @@ Changelog
313313
========= ======================================================================
314314
Version Description
315315
========= ======================================================================
316+
1.1.0 * add exclude_pattern in input data section
317+
1.0.6 * add py3.12, slight updates wrt slurm
316318
1.0.5 * introspect slurm files to extract stats
317319
1.0.4 * add utility function to download and untar a tar.gz file
318320
1.0.3 * add levenshtein function. some typo corrections.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
66
#maintainer ?#maintainer email
77
[tool.poetry]
88
name = "sequana_pipetools"
9-
version = "1.0.6"
9+
version = "1.1.0"
1010
description = "A set of tools to help building or using Sequana pipelines"
1111
authors = ["Sequana Team"]
1212
license = "BSD-3"

sequana_pipetools/options.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,9 @@ def deps_callback(ctx, param, value):
215215
data = fin.read()
216216
data = data.split()
217217
data = "\n".join(sorted(data))
218-
click.echo(f"Those software will be required for the pipeline to work correctly:\n\n{data}\n")
218+
click.echo(
219+
f"sequana_{ctx.NAME} will need one or more of these software to work correctly. We recommend you to use --apptainer option so that you do not need to install them manually:\n\n{data}\n"
220+
)
219221
ctx.exit(0)
220222

221223

@@ -302,11 +304,17 @@ def __init__(self, working_directory="analysis", caller=None):
302304

303305
class ClickInputOptions:
304306
group_name = "Data"
305-
metadata = {"name": group_name, "options": ["--input-directory", "--input-pattern", "--input-readtag"]}
307+
metadata = {
308+
"name": group_name,
309+
"options": ["--input-directory", "--input-pattern", "--input-readtag", "--exclude-pattern"],
310+
}
306311

307-
def __init__(self, input_directory=".", input_pattern="*fastq.gz", add_input_readtag=True, caller=None):
312+
def __init__(
313+
self, input_directory=".", input_pattern="*fastq.gz", add_input_readtag=True, caller=None, exclude_pattern=None
314+
):
308315
self.input_directory = input_directory
309316
self.input_pattern = input_pattern
317+
self.exclude_pattern = exclude_pattern
310318
self.add_input_readtag = add_input_readtag
311319

312320
self.options = [
@@ -327,6 +335,14 @@ def __init__(self, input_directory=".", input_pattern="*fastq.gz", add_input_rea
327335
show_default=True,
328336
help=f"pattern for the input files ({input_pattern})",
329337
),
338+
click.option(
339+
"--exclude-pattern",
340+
"exclude_pattern",
341+
default=self.exclude_pattern,
342+
type=click.STRING,
343+
show_default=True,
344+
help=f"pattern for excluding input files ({exclude_pattern})",
345+
),
330346
]
331347

332348
if self.add_input_readtag:

sequana_pipetools/sequana_manager.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,11 @@ def fill_data_options(self):
178178
cfg.input_directory = os.path.abspath(options["input_directory"])
179179
if "--input-readtag" in sys.argv:
180180
cfg.input_readtag = options["input_readtag"]
181+
if "--exclude-pattern" in sys.argv:
182+
cfg.exclude_pattern = options["exclude_pattern"]
181183
else:
182184
cfg.input_pattern = options.input_pattern
185+
cfg.exclude_pattern = options.exclude_pattern
183186
cfg.input_readtag = options.input_readtag
184187
cfg.input_directory = os.path.abspath(options.input_directory)
185188

@@ -258,6 +261,10 @@ def check_input_files(self, stop_on_error=True):
258261
cfg = self.config.config
259262

260263
filenames = glob.glob(cfg.input_directory + os.sep + cfg.input_pattern)
264+
265+
# this code is just informative. Actual run is snaketools.pipeline_manager
266+
if cfg.get("exclude_pattern", None) and cfg.get("exclude_pattern"):
267+
filenames = [x for x in filenames if cfg.get("exclude_pattern") not in x.split("/")[-1]]
261268
logger.info(
262269
f"\u2705 Found {len(filenames)} files matching your input pattern ({cfg.input_pattern}) in {cfg.input_directory}"
263270
)

sequana_pipetools/snaketools/file_factory.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class FileFactory:
7171
7272
"""
7373

74-
def __init__(self, pattern, extra_prefixes_to_strip=[], sample_pattern=None, **kwargs):
74+
def __init__(self, pattern, extra_prefixes_to_strip=[], sample_pattern=None, exclude_pattern=None, **kwargs):
7575
""".. rubric:: Constructor
7676
7777
:param pattern: can be a filename, list of filenames, or a global
@@ -92,6 +92,7 @@ def __init__(self, pattern, extra_prefixes_to_strip=[], sample_pattern=None, **k
9292
self.pattern = pattern
9393
self.extra_prefixes_to_strip = extra_prefixes_to_strip
9494
self.sample_pattern = sample_pattern
95+
self.exclude_pattern = exclude_pattern
9596

9697
try:
9798
if os.path.exists(pattern):
@@ -108,6 +109,10 @@ def __init__(self, pattern, extra_prefixes_to_strip=[], sample_pattern=None, **k
108109
# remove directories if they exist
109110
self._glob = [x for x in self._glob if not os.path.isdir(x)]
110111

112+
# remove candidates that have the exclude pattern
113+
if self.exclude_pattern: # pragma: no cover
114+
self._glob = [x for x in self._glob if not self.exclude_pattern in x]
115+
111116
def _get_realpaths(self):
112117
return [os.path.realpath(filename) for filename in self._glob]
113118

@@ -156,7 +161,7 @@ def func(filename):
156161

157162
if filename.startswith(prefix) and filename.endswith(suffix):
158163
res = res[len(prefix) : len(res) - len(suffix)]
159-
else:
164+
else: # pragma: no cover
160165
raise PipetoolsException(f"Your sample pattern does not match the filename {filename}")
161166
else:
162167
res = filename[:]
@@ -246,6 +251,7 @@ def __init__(
246251
read_tag="_R[12]_",
247252
extra_prefixes_to_strip=[],
248253
sample_pattern=None,
254+
exclude_pattern=None,
249255
**kwargs,
250256
):
251257
r""".. rubric:: Constructor
@@ -267,7 +273,10 @@ def __init__(
267273
and your sample will be only 'A'.
268274
"""
269275
super(FastQFactory, self).__init__(
270-
pattern, extra_prefixes_to_strip=extra_prefixes_to_strip, sample_pattern=sample_pattern
276+
pattern,
277+
extra_prefixes_to_strip=extra_prefixes_to_strip,
278+
sample_pattern=sample_pattern,
279+
exclude_pattern=exclude_pattern,
271280
)
272281

273282
self.read_tag = read_tag

sequana_pipetools/snaketools/pipeline_manager.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ def teardown(self, extra_dirs_to_remove=[], extra_files_to_remove=[], outdir="."
157157
cleaner.add_makefile()
158158

159159
# create the version file given the requirements
160-
161160
if os.path.exists(f"{outdir}/.sequana/tools.txt"):
162161
with open(f"{outdir}/.sequana/tools.txt", "r") as fin:
163162
deps = fin.readlines()
@@ -254,6 +253,10 @@ class PipelineManager(PipelineManagerBase):
254253
- input_readtag: "_R[12]_"
255254
- input_pattern: "*.fastq.gz"
256255
256+
and optional option:
257+
258+
- exclude_pattern:
259+
257260
You may omit the input_readtag, which is not required for non-paired data. For instance for
258261
pacbio and nanopore files, there are not paired and the read tag is not required. Instead, if
259262
you are dealing with Illumina/MGI data sets, you must provide this field IF AND ONLY IF you want
@@ -342,6 +345,7 @@ def __init__(
342345
sample_func=None,
343346
extra_prefixes_to_strip=[],
344347
sample_pattern=None,
348+
exclude_pattern=None,
345349
**kwargs,
346350
):
347351
""".. rubric:: Constructor
@@ -371,6 +375,7 @@ def __init__(
371375
# can be provided in the config file or arguments
372376
self.sample_pattern = cfg.config.get("sample_pattern", sample_pattern)
373377
self.extra_prefixes_to_strip = cfg.config.get("extra_prefixes_to_strip", extra_prefixes_to_strip)
378+
self.exclude_pattern = cfg.config.get("exclude_pattern", exclude_pattern)
374379

375380
# if input_directory is not filled, the input_pattern, if valid, will be used instead and must
376381
# be provided anyway.
@@ -446,6 +451,7 @@ def _get_fastq_files(self, glob_dir, read_tag):
446451
read_tag=read_tag,
447452
extra_prefixes_to_strip=self.extra_prefixes_to_strip,
448453
sample_pattern=self.sample_pattern,
454+
exclude_pattern=self.exclude_pattern,
449455
)
450456

451457
# check whether it is paired or not. This is just to raise an error when
@@ -470,7 +476,10 @@ def _get_fastq_files(self, glob_dir, read_tag):
470476

471477
def _get_any_files(self, pattern):
472478
self.ff = FileFactory(
473-
pattern, extra_prefixes_to_strip=self.extra_prefixes_to_strip, sample_pattern=self.sample_pattern
479+
pattern,
480+
extra_prefixes_to_strip=self.extra_prefixes_to_strip,
481+
sample_pattern=self.sample_pattern,
482+
exclude_pattern=self.exclude_pattern,
474483
)
475484

476485
# samples contains a correspondance between the sample name and the

0 commit comments

Comments
 (0)