Skip to content

Make some slots on MassSpectrometryConfiguration and ChromatographyConfiguration required and create a migrator #2465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from nmdc_schema.migrators.partials.migrator_from_11_7_0_to_11_8_0 import migrator_from_11_7_0_to_11_8_0_part_1
from nmdc_schema.migrators.partials.migrator_from_11_7_0_to_11_8_0 import migrator_from_11_7_0_to_11_8_0_part_2
from nmdc_schema.migrators.partials.migrator_from_11_7_0_to_11_8_0 import migrator_from_11_7_0_to_11_8_0_part_3
from nmdc_schema.migrators.partials.migrator_from_11_7_0_to_11_8_0 import migrator_from_11_7_0_to_11_8_0_part_4




Expand All @@ -27,5 +29,6 @@ def get_migrator_classes() -> List[Type[MigratorBase]]:
migrator_from_11_7_0_to_11_8_0_part_1.Migrator,
migrator_from_11_7_0_to_11_8_0_part_2.Migrator,
migrator_from_11_7_0_to_11_8_0_part_3.Migrator,
migrator_from_11_7_0_to_11_8_0_part_4.Migrator,
]

Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from nmdc_schema.migrators.migrator_base import MigratorBase

class Migrator(MigratorBase):
r"""Migrates a database between two schemas."""

_from_version = "11.8.0.part_3"
_to_version = "11.8.0.part_4"

def upgrade(self) -> None:
r"""Migrates the database from conforming to the original schema, to conforming to the new schema."""
self.adapter.do_for_each_document("configuration_set", self.validate_mass_spec_config_slots)
self.adapter.do_for_each_document("configuration_set", self.validate_chrom_config_slots)

def validate_mass_spec_config_slots(self, configuration_record: dict) -> None:
r"""
If the configuration record is of type MassSpectrometryConfiguration, does not have mass_spectrometry_acquisition_strategy, resolution_categories, mass_analyzers, ionization_source, mass_spectrum_collection_modes, and polarity_mode AND those keys do not have a value, raise a ValueError.

>>> m = Migrator()
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "mass_spectrometry_acquisition_strategy": "data_independent_acquisition", "resolution_categories": "high", "mass_analyzers": "ion_trap", "ionization_source": "electron_ionization", "mass_spectrum_collection_modes": "centroid", "polarity_mode": "positive"})
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "mass_spectrometry_acquisition_strategy": "data_independent_acquisition", "resolution_categories": "high", "mass_analyzers": "ion_trap", "ionization_source": "electron_ionization", "mass_spectrum_collection_modes": "centroid"})
Traceback (most recent call last):
...
ValueError: `polarity_mode` is required and is not present in the configuration record 123
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "mass_spectrometry_acquisition_strategy": "data_independent_acquisition", "resolution_categories": "high", "mass_analyzers": "ion_trap", "ionization_source": "electron_ionization", "polarity_mode": "positive"})
Traceback (most recent call last):
...
ValueError: `mass_spectrum_collection_modes` is required and is not present in the configuration record 123
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "mass_spectrometry_acquisition_strategy": "data_independent_acquisition", "resolution_categories": "high", "mass_analyzers": "ion_trap", "mass_spectrum_collection_modes": "centroid", "polarity_mode": "positive"})
Traceback (most recent call last):
...
ValueError: `ionization_source` is required and is not present in the configuration record 123
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "mass_spectrometry_acquisition_strategy": "data_independent_acquisition", "resolution_categories": "high", "mass_spectrum_collection_modes": "centroid", "polarity_mode": "positive"})
Traceback (most recent call last):
...
ValueError: `mass_analyzers` is required and is not present in the configuration record 123
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "mass_spectrometry_acquisition_strategy": "data_independent_acquisition", "mass_analyzers": "ion_trap", "ionization_source": "electron_ionization", "mass_spectrum_collection_modes": "centroid", "polarity_mode": "positive"})
Traceback (most recent call last):
...
ValueError: `resolution_categories` is required and is not present in the configuration record 123
>>> m.validate_mass_spec_config_slots({"id": 123, "type": "nmdc:MassSpectrometryConfiguration", "resolution_categories": "high", "mass_analyzers": "ion_trap", "ionization_source": "electron_ionization", "mass_spectrum_collection_modes": "centroid", "polarity_mode": "positive"})
Traceback (most recent call last):
...
ValueError: `mass_spectrometry_acquisition_strategy` is required and is not present in the configuration record 123
"""
# get the slots that are required for mass spectrometry configuration
if configuration_record.get("type") == "nmdc:MassSpectrometryConfiguration":
mass_spectrometry_acquisition_strategy = configuration_record.get("mass_spectrometry_acquisition_strategy")

if mass_spectrometry_acquisition_strategy is None:
raise ValueError(f"`mass_spectrometry_acquisition_strategy` is required and is not present in the configuration record {configuration_record.get('id')}")

resolution_categories = configuration_record.get("resolution_categories")
if resolution_categories is None:
raise ValueError(f"`resolution_categories` is required and is not present in the configuration record {configuration_record.get('id')}")

mass_analyzers = configuration_record.get("mass_analyzers")
if mass_analyzers is None:
raise ValueError(f"`mass_analyzers` is required and is not present in the configuration record {configuration_record.get('id')}")
ionization_source = configuration_record.get("ionization_source")
if ionization_source is None:
raise ValueError(f"`ionization_source` is required and is not present in the configuration record {configuration_record.get('id')}")
mass_spectrum_collection_modes = configuration_record.get("mass_spectrum_collection_modes")
if mass_spectrum_collection_modes is None:
raise ValueError(f"`mass_spectrum_collection_modes` is required and is not present in the configuration record {configuration_record.get('id')}")
polarity_mode = configuration_record.get("polarity_mode")
if polarity_mode is None:
raise ValueError(f"`polarity_mode` is required and is not present in the configuration record {configuration_record.get('id')}")

def validate_chrom_config_slots(self, configuration_record:dict) -> None:
r"""
If the configuration record is of type ChromatographyConfiguration, does not chromatographic_category and stationary_phase, AND those keys do not have a value, raise a ValueError.

>>> m = Migrator()
>>> m.validate_chrom_config_slots({"id": 123, "type": "nmdc:ChromatographyConfiguration", "chromatographic_category": "gas_chromatography", "stationary_phase": "C18"})
>>> m.validate_chrom_config_slots({"id": 123, "type": "nmdc:ChromatographyConfiguration", "chromatographic_category": "gas_chromatography"})
Traceback (most recent call last):
...
ValueError: `stationary_phase` is required and is not present in the configuration record 123
>>> m.validate_chrom_config_slots({"id": 123, "type": "nmdc:ChromatographyConfiguration", "stationary_phase": "C18"})
Traceback (most recent call last):
...
ValueError: `chromatographic_category` is required and is not present in the configuration record 123
"""
if configuration_record.get("type") == "nmdc:ChromatographyConfiguration":
chromatographic_category = configuration_record.get("chromatographic_category")
if chromatographic_category is None:
raise ValueError(f"`chromatographic_category` is required and is not present in the configuration record {configuration_record.get('id')}")

stationary_phase = configuration_record.get("stationary_phase")
if stationary_phase is None:
raise ValueError(f"`stationary_phase` is required and is not present in the configuration record {configuration_record.get('id')}")

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# this is invalid because it is missing the required field `stationary_phase`
id: nmdc:chrcon-99-oW43DzG0
type: nmdc:ChromatographyConfiguration
name: "EMSL GC method for small molecules"
description: "EMSL's GC method for small molecule analysis"
chromatographic_category: gas_chromatography
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#this is invalid because it is missing the chromatographic_category field
id: nmdc:chrcon-99-oW43DzG0
type: nmdc:ChromatographyConfiguration
name: "EMSL LC method for non-polar metabolites"
description: "EMSL's LC method for non-polar metabolites"
stationary_phase: C18
13 changes: 13 additions & 0 deletions src/data/invalid/MassSpectrometryConfiguration-invalid-no_is.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# this is invalid because it is missing the required field `ionization_source`
id: nmdc:mscon-99-oW43DzG0
name: EMSL_NOM_method1
description: Mass Spectrometry method used by EMSL for NOM analysis
type: nmdc:MassSpectrometryConfiguration
mass_spectrometry_acquisition_strategy: full_scan_only
resolution_categories:
- high
mass_analyzers:
- ion_cyclotron_resonance
mass_spectrum_collection_modes:
- full_profile
polarity_mode: negative
12 changes: 12 additions & 0 deletions src/data/invalid/MassSpectrometryConfiguration-invalid-no_ma.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# this is invalid because it is missing the required field `mass_analyzers`
id: nmdc:mscon-99-oW43DzG0
name: EMSL_NOM_method1
description: Mass Spectrometry method used by EMSL for NOM analysis
type: nmdc:MassSpectrometryConfiguration
mass_spectrometry_acquisition_strategy: full_scan_only
resolution_categories:
- high
ionization_source: electrospray_ionization
mass_spectrum_collection_modes:
- full_profile
polarity_mode: negative
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# this file is invalid because it does not have a `mass_spectrometry_acquisition_strategy` field
id: nmdc:mscon-99-oW43DzG0
name: EMSL_NOM_method1
description: Mass Spectrometry method used by EMSL for NOM analysis
type: nmdc:MassSpectrometryConfiguration
resolution_categories:
- high
mass_analyzers:
- ion_cyclotron_resonance
ionization_source: electrospray_ionization
mass_spectrum_collection_modes:
- full_profile
polarity_mode: negative
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# this is invalid because it is missing the required field `mass_spectrum_collection_modes`
id: nmdc:mscon-99-oW43DzG0
name: EMSL_NOM_method1
description: Mass Spectrometry method used by EMSL for NOM analysis
type: nmdc:MassSpectrometryConfiguration
mass_spectrometry_acquisition_strategy: full_scan_only
resolution_categories:
- high
mass_analyzers:
- ion_cyclotron_resonance
ionization_source: electrospray_ionization
polarity_mode: negative
13 changes: 13 additions & 0 deletions src/data/invalid/MassSpectrometryConfiguration-invalid-no_pm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# this is invalid because it is missing the required field `polarity_mode`
id: nmdc:mscon-99-oW43DzG0
name: EMSL_NOM_method1
description: Mass Spectrometry method used by EMSL for NOM analysis
type: nmdc:MassSpectrometryConfiguration
mass_spectrometry_acquisition_strategy: full_scan_only
resolution_categories:
- high
mass_analyzers:
- ion_cyclotron_resonance
ionization_source: electrospray_ionization
mass_spectrum_collection_modes:
- full_profile
12 changes: 12 additions & 0 deletions src/data/invalid/MassSpectrometryConfiguration-invalid-no_rc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# this is invalid because it is missing the required field `resolution_categories`
id: nmdc:mscon-99-oW43DzG0
name: EMSL_NOM_method1
description: Mass Spectrometry method used by EMSL for NOM analysis
type: nmdc:MassSpectrometryConfiguration
mass_spectrometry_acquisition_strategy: full_scan_only
mass_analyzers:
- ion_cyclotron_resonance
ionization_source: electrospray_ionization
mass_spectrum_collection_modes:
- full_profile
polarity_mode: negative
1 change: 1 addition & 0 deletions src/data/valid/Database-mass_spectrometry_gc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ configuration_set:
type: nmdc:MassSpectrometryConfiguration
name: "EMSL EI mass spectrometry method for small molecules"
description: "Electron impact mass spectrometry method for small molecules"
mass_spectrometry_acquisition_strategy: data_dependent_acquisition
ionization_source: electron_ionization
mass_analyzers:
- quadrupole
Expand Down
16 changes: 16 additions & 0 deletions src/schema/nmdc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,18 @@ classes:
structured_pattern:
syntax: "{id_nmdc_prefix}:mscon-{id_shoulder}-{id_blade}$"
interpolated: true
mass_spectrometry_acquisition_strategy:
required: true
resolution_categories:
required: true
mass_analyzers:
required: true
ionization_source:
required: true
mass_spectrum_collection_modes:
required: true
polarity_mode:
required: true

ChromatographyConfiguration:
is_a: Configuration
Expand All @@ -289,6 +301,10 @@ classes:
structured_pattern:
syntax: "{id_nmdc_prefix}:chrcon-{id_shoulder}-{id_blade}$"
interpolated: true
chromatographic_category:
required: true
stationary_phase:
required: true

Manifest:
is_a: InformationObject
Expand Down