Skip to content

Commit b725d43

Browse files
authored
allow narrowing of inherited types (#535)
1 parent c6cc2e5 commit b725d43

File tree

12 files changed

+257
-19
lines changed

12 files changed

+257
-19
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ COVBASE=coverage run --append
3434

3535
# Updating the Major & Minor version below?
3636
# Don't forget to update setup.py as well
37-
VERSION=8.2.$(shell date +%Y%m%d%H%M%S --utc --date=`git log --first-parent \
37+
VERSION=8.3.$(shell date +%Y%m%d%H%M%S --utc --date=`git log --first-parent \
3838
--max-count=1 --format=format:%cI`)
3939

4040
## all : default task

schema_salad/avro/schema.py

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def __init__(
303303
type_schema = make_avsc_object(atype, names)
304304
except Exception as e:
305305
raise SchemaParseException(
306-
f'Type property "{atype}" not a valid Avro schema.'
306+
f'Type property "{atype}" not a valid Avro schema: {e}'
307307
) from e
308308
self.set_prop("type", type_schema)
309309
self.set_prop("name", name)
@@ -409,8 +409,8 @@ def __init__(
409409
items_schema = make_avsc_object(items, names)
410410
except Exception as err:
411411
raise SchemaParseException(
412-
f"Items schema ({items}) not a valid Avro schema: (known "
413-
f"names: {list(names.names.keys())})."
412+
f"Items schema ({items}) not a valid Avro schema: {err}. "
413+
f"Known names: {list(names.names.keys())})."
414414
) from err
415415

416416
self.set_prop("items", items_schema)
@@ -451,7 +451,7 @@ def __init__(
451451
new_schema = make_avsc_object(schema, names)
452452
except Exception as err:
453453
raise SchemaParseException(
454-
f"Union item must be a valid Avro schema: {schema}"
454+
f"Union item must be a valid Avro schema: {err}; {schema},"
455455
) from err
456456
# check the new schema
457457
if (
@@ -477,7 +477,7 @@ class RecordSchema(NamedSchema):
477477
def make_field_objects(field_data: List[PropsType], names: Names) -> List[Field]:
478478
"""We're going to need to make message parameters too."""
479479
field_objects = [] # type: List[Field]
480-
field_names = [] # type: List[str]
480+
parsed_fields: Dict[str, PropsType] = {}
481481
for field in field_data:
482482
if hasattr(field, "get") and callable(field.get):
483483
atype = field.get("type")
@@ -504,10 +504,15 @@ def make_field_objects(field_data: List[PropsType], names: Names) -> List[Field]
504504
atype, name, has_default, default, order, names, doc, other_props
505505
)
506506
# make sure field name has not been used yet
507-
if new_field.name in field_names:
508-
fail_msg = f"Field name {new_field.name} already in use."
509-
raise SchemaParseException(fail_msg)
510-
field_names.append(new_field.name)
507+
if new_field.name in parsed_fields:
508+
old_field = parsed_fields[new_field.name]
509+
if not is_subtype(old_field["type"], field["type"]):
510+
raise SchemaParseException(
511+
f"Field name {new_field.name} already in use with "
512+
"incompatible type. "
513+
f"{field['type']} vs {old_field['type']}."
514+
)
515+
parsed_fields[new_field.name] = field
511516
else:
512517
raise SchemaParseException(f"Not a valid field: {field}")
513518
field_objects.append(new_field)
@@ -655,3 +660,62 @@ def make_avsc_object(json_data: JsonDataType, names: Optional[Names] = None) ->
655660
# not for us!
656661
fail_msg = f"Could not make an Avro Schema object from {json_data}."
657662
raise SchemaParseException(fail_msg)
663+
664+
665+
def is_subtype(existing: PropType, new: PropType) -> bool:
666+
"""Checks if a new type specification is compatible with an existing type spec."""
667+
if existing == new:
668+
return True
669+
if isinstance(existing, list) and (new in existing):
670+
return True
671+
if existing == "Any":
672+
if new is None or new == [] or new == ["null"] or new == "null":
673+
return False
674+
if isinstance(new, list) and "null" in new:
675+
return False
676+
return True
677+
if (
678+
isinstance(existing, dict)
679+
and "type" in existing
680+
and existing["type"] == "array"
681+
and isinstance(new, dict)
682+
and "type" in new
683+
and new["type"] == "array"
684+
):
685+
return is_subtype(existing["items"], new["items"])
686+
if (
687+
isinstance(existing, dict)
688+
and "type" in existing
689+
and existing["type"] == "enum"
690+
and isinstance(new, dict)
691+
and "type" in new
692+
and new["type"] == "enum"
693+
):
694+
return is_subtype(existing["symbols"], new["symbols"])
695+
if (
696+
isinstance(existing, dict)
697+
and "type" in existing
698+
and existing["type"] == "record"
699+
and isinstance(new, dict)
700+
and "type" in new
701+
and new["type"] == "record"
702+
):
703+
for new_field in cast(List[Dict[str, Any]], new["fields"]):
704+
new_field_missing = True
705+
for existing_field in cast(List[Dict[str, Any]], existing["fields"]):
706+
if new_field["name"] == existing_field["name"]:
707+
if not is_subtype(existing_field["type"], new_field["type"]):
708+
return False
709+
new_field_missing = False
710+
if new_field_missing:
711+
return False
712+
return True
713+
if isinstance(existing, list) and isinstance(new, list):
714+
missing = False
715+
for _type in new:
716+
if _type not in existing and (
717+
not is_subtype(existing, cast(PropType, _type))
718+
):
719+
missing = True
720+
return not missing
721+
return False

schema_salad/metaschema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
Type,
2323
Union,
2424
)
25-
from urllib.parse import quote, urlsplit, urlunsplit, urlparse
25+
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2626
from urllib.request import pathname2url
2727

2828
from ruamel.yaml.comments import CommentedMap

schema_salad/metaschema/metaschema.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@ $graph:
309309
type: boolean?
310310
doc: |
311311
If true, this record is abstract and may be used as a base for other
312-
records, but is not valid on its own.
312+
records, but is not valid on its own. Inherited fields may be
313+
re-specified to narrow their type.
313314
314315
- name: extends
315316
type:
@@ -321,7 +322,7 @@ $graph:
321322
refScope: 1
322323
doc: |
323324
Indicates that this record inherits fields from one or more base records.
324-
325+
Inherited fields may be re-specified to narrow their type.
325326
- name: specialize
326327
type:
327328
- SpecializeDef[]?

schema_salad/metaschema/salad.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Contributors:
99
* The developers of Apache Avro
1010
* The developers of JSON-LD
1111
* Nebojša Tijanić <[email protected]>, Seven Bridges Genomics
12+
* Michael R. Crusoe, ELIXIR-DE
1213

1314
# Abstract
1415

@@ -86,6 +87,13 @@ specification, the following changes have been made:
8687
is poorly documented, not included in conformance testing,
8788
and not widely supported.
8889

90+
## Introduction to v1.2
91+
92+
This is the fourth version of the Schema Salad specification. It was created to
93+
ease the development of extensions to CWL v1.2. The only change is that
94+
inherited records can narrow the types of fields if those fields are re-specified
95+
with a matching jsonldPredicate.
96+
8997
## References to Other Specifications
9098

9199
**Javascript Object Notation (JSON)**: http://json.org

schema_salad/python_codegen_support.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
Type,
2020
Union,
2121
)
22-
from urllib.parse import quote, urlsplit, urlunsplit, urlparse
22+
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2323
from urllib.request import pathname2url
2424

2525
from ruamel.yaml.comments import CommentedMap

0 commit comments

Comments
 (0)