Skip to content

Commit 4e02046

Browse files
committed
fixes import on a typical schema-org.json
1 parent b0ae873 commit 4e02046

File tree

1 file changed

+74
-36
lines changed

1 file changed

+74
-36
lines changed

pygeometa/schemas/schema_org/__init__.py

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -128,43 +128,48 @@ def import_(self, metadata: str) -> dict:
128128

129129
if 'spatialCoverage' in md or 'spatial' in md:
130130
crs = 4326
131-
geo = md['spatialCoverage']['geo']
132-
if geo['@type'] == 'GeoCoordinates':
133-
mcf['spatial']['datatype'] = 'vector'
134-
mcf['spatial']['geomtype'] = 'point'
135-
bbox = [geo['longitude'], geo['latitude'],
136-
geo['longitude'], geo['latitude']]
137-
elif geo['@type'] == 'GeoShape':
138-
mcf['spatial']['datatype'] = 'vector'
139-
mcf['spatial']['geomtype'] = 'polygon'
140-
bt = geo['box'].split()
141-
bbox = bt[1], bt[0], bt[3], bt[2]
142-
143-
mcf['identification']['extents']['spatial'].append({
144-
'bbox': bbox,
145-
'crs': crs
146-
})
131+
mcf['spatial'] = mcf.get('spatial', {})
132+
md['spatial'] = md.get('spatial', md.get('spatialCoverage'))
133+
geo = self.get_first(self.get_first(md, 'spatial', {}), 'geo')
134+
bbox = None
135+
if geo and '@type' in geo.keys():
136+
if geo['@type'] == 'GeoCoordinates':
137+
mcf['spatial']['datatype'] = 'vector'
138+
mcf['spatial']['geomtype'] = 'point'
139+
bbox = [geo['longitude'], geo['latitude'],
140+
geo['longitude'], geo['latitude']]
141+
elif geo['@type'] == 'GeoShape':
142+
mcf['spatial']['datatype'] = 'vector'
143+
mcf['spatial']['geomtype'] = 'polygon'
144+
bt = geo['box'].replace(' ', ',').split()
145+
if len(bt) == 4:
146+
bbox = bt[1], bt[0], bt[3], bt[2]
147+
if bbox:
148+
mcf['identification']['extents']['spatial'].append({
149+
'bbox': bbox,
150+
'crs': crs
151+
})
147152

148153
if 'temporalCoverage' in md:
149-
begin, end = md['temporalCoverage'].split('/')
154+
begin, end = self.get_first(md, 'temporalCoverage', '/').split('/')
150155
mcf['identification']['extents']['temporal'] = [{
151156
'begin': begin,
152157
'end': end
153158
}]
154159

155160
mcf['identification']['language'] = mcf['metadata']['language']
156-
mcf['identification']['title'] = md['name']
157-
mcf['identification']['abstract'] = md['description']
161+
mcf['identification']['title'] = self.get_first(md, 'name')
162+
mcf['identification']['abstract'] = self.get_first(md, 'description')
158163

159164
if 'dateCreated' in md:
160-
mcf['metadata']['identification']['creation'] = md['datePublished']
165+
mcf['identification']['creation'] = self.get_first(md, 'datePublished') # noqa
161166
if 'datePublished' in md:
162-
mcf['metadata']['identification']['publication'] = md['datePublished'] # noqa
167+
mcf['identification']['publication'] = self.get_first(md, 'datePublished') # noqa
163168
if 'dateModified' in md:
164-
mcf['metadata']['identification']['revision'] = md['dateModified']
169+
mcf['identification']['revision'] = self.get_first(md, 'dateModified') # noqa
165170

166171
if 'version' in md:
167-
mcf['metadata']['identification']['edition'] = md['version']
172+
mcf['identification']['edition'] = self.get_first(md, 'version')
168173

169174
mcf['identification']['keywords'] = {
170175
'default': {
@@ -174,9 +179,9 @@ def import_(self, metadata: str) -> dict:
174179

175180
for dist in md['distribution']:
176181
mcf['distribution'][dist['name']] = {
177-
'name': dist['name'],
178-
'type': dist['encodingFormat'],
179-
'url': dist['contentUrl'],
182+
'name': self.get_first(dist, 'name'),
183+
'type': self.get_first(dist, 'encodingFormat'),
184+
'url': self.get_first(dist, 'contentUrl'),
180185
'rel': 'download',
181186
'function': 'download'
182187
}
@@ -185,21 +190,21 @@ def import_(self, metadata: str) -> dict:
185190
if ct in md:
186191
contact = {}
187192
contact['url'] = md[ct]['url']
188-
contact['individualname'] = md[ct]['name']
193+
contact['individualname'] = self.get_first(ct, 'name')
189194
if md[ct]['@type'] == 'Organization':
190-
contact['organization'] = md[ct]['name']
195+
contact['organization'] = self.get_first(ct, 'name')
191196

192197
if 'address' in md[ct]:
193-
contact['address'] = md[ct]['streetAddress']
194-
contact['city'] = md[ct]['addressLocality']
195-
contact['administrativearea'] = md[ct]['addressRegion']
196-
contact['postalcode'] = md[ct]['postalCode']
197-
contact['country'] = md[ct]['addressCountry']
198+
contact['address'] = self.get_first(ct, 'streetAddress')
199+
contact['city'] = self.get_first(ct, 'addressLocality')
200+
contact['administrativearea'] = self.get_first(ct, 'addressRegion') # noqa
201+
contact['postalcode'] = self.get_first(ct, 'postalCode')
202+
contact['country'] = self.get_first(ct, 'addressCountry')
198203

199204
if 'contactPoint' in md[ct]:
200-
cp = md[ct][0]
201-
contact['email'] = cp['email']
202-
contact['fax'] = cp['fax']
205+
cp = self.get_first(ct, 'contactPoint')
206+
contact['email'] = self.get_first(cp, 'email')
207+
contact['fax'] = self.get_first(cp, 'fax')
203208

204209
mcf['contact'][ct] = contact
205210

@@ -522,3 +527,36 @@ def generate_link(self, distribution: dict) -> dict:
522527
link['description'] = desc[0]
523528

524529
return link
530+
531+
def get_first(self, obj, key, default=None):
532+
"""
533+
returns first element of a list else return element
534+
535+
:param obj: any
536+
537+
:returns: first element (str, num or dict)
538+
"""
539+
if key not in obj.keys() or not obj[key]:
540+
return default
541+
elif isinstance(obj[key], list):
542+
if len(obj[key]) > 0:
543+
return obj[key][0]
544+
else:
545+
return default
546+
else:
547+
return obj[key]
548+
549+
def get_all(self, obj, key, default=[]):
550+
"""
551+
return list of elements
552+
553+
:param obj: any
554+
555+
:returns: list of elements
556+
"""
557+
if 'key' not in obj.keys() or not obj[key]:
558+
return default
559+
elif isinstance(obj[key], list):
560+
return obj[key]
561+
else:
562+
return [obj[key]]

0 commit comments

Comments
 (0)