Skip to content

Commit 5132291

Browse files
committed
fixes import on a typical schema-org.json
1 parent 9b941bf commit 5132291

File tree

1 file changed

+76
-36
lines changed

1 file changed

+76
-36
lines changed

pygeometa/schemas/schema_org/__init__.py

Lines changed: 76 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -113,43 +113,48 @@ def import_(self, metadata: str) -> dict:
113113

114114
if 'spatialCoverage' in md or 'spatial' in md:
115115
crs = 4326
116-
geo = md['spatialCoverage']['geo']
117-
if geo['@type'] == 'GeoCoordinates':
118-
mcf['spatial']['datatype'] = 'vector'
119-
mcf['spatial']['geomtype'] = 'point'
120-
bbox = [geo['longitude'], geo['latitude'],
121-
geo['longitude'], geo['latitude']]
122-
elif geo['@type'] == 'GeoShape':
123-
mcf['spatial']['datatype'] = 'vector'
124-
mcf['spatial']['geomtype'] = 'polygon'
125-
bt = geo['box'].split()
126-
bbox = bt[1], bt[0], bt[3], bt[2]
127-
128-
mcf['identification']['extents']['spatial'].append({
129-
'bbox': bbox,
130-
'crs': crs
131-
})
116+
mcf['spatial'] = mcf.get('spatial',{})
117+
md['spatial'] = md.get('spatial', md.get('spatialCoverage'))
118+
geo = self.get_first(self.get_first(md,'spatial',{}),'geo')
119+
bbox = None
120+
if geo and '@type' in geo.keys():
121+
if geo['@type'] == 'GeoCoordinates':
122+
mcf['spatial']['datatype'] = 'vector'
123+
mcf['spatial']['geomtype'] = 'point'
124+
bbox = [geo['longitude'], geo['latitude'],
125+
geo['longitude'], geo['latitude']]
126+
elif geo['@type'] == 'GeoShape':
127+
mcf['spatial']['datatype'] = 'vector'
128+
mcf['spatial']['geomtype'] = 'polygon'
129+
bt = geo['box'].replace(' ',',').split()
130+
if len(bt) == 4:
131+
bbox = bt[1], bt[0], bt[3], bt[2]
132+
if bbox:
133+
mcf['identification']['extents']['spatial'].append({
134+
'bbox': bbox,
135+
'crs': crs
136+
})
132137

133138
if 'temporalCoverage' in md:
134-
begin, end = md['temporalCoverage'].split('/')
139+
begin, end = self.get_first(md,'temporalCoverage','/').split('/')
135140
mcf['identification']['extents']['temporal'] = [{
136141
'begin': begin,
137142
'end': end
138143
}]
139144

140145
mcf['identification']['language'] = mcf['metadata']['language']
141-
mcf['identification']['title'] = md['name']
142-
mcf['identification']['abstract'] = md['description']
146+
mcf['identification']['title'] = self.get_first(md,'name')
147+
mcf['identification']['abstract'] = self.get_first(md,'description')
143148

144149
if 'dateCreated' in md:
145-
mcf['metadata']['identification']['creation'] = md['datePublished']
150+
mcf['identification']['creation'] = self.get_first(md,'datePublished') # noqa
146151
if 'datePublished' in md:
147-
mcf['metadata']['identification']['publication'] = md['datePublished'] # noqa
152+
mcf['identification']['publication'] = self.get_first(md,'datePublished') # noqa
148153
if 'dateModified' in md:
149-
mcf['metadata']['identification']['revision'] = md['dateModified']
154+
mcf['identification']['revision'] = self.get_first(md,'dateModified') # noqa
150155

151156
if 'version' in md:
152-
mcf['metadata']['identification']['edition'] = md['version']
157+
mcf['identification']['edition'] = self.get_first(md,'version')
153158

154159
mcf['identification']['keywords'] = {
155160
'default': {
@@ -159,32 +164,33 @@ def import_(self, metadata: str) -> dict:
159164

160165
for dist in md['distribution']:
161166
mcf['distribution'][dist['name']] = {
162-
'name': dist['name'],
163-
'type': dist['encodingFormat'],
164-
'url': dist['contentUrl'],
167+
'name': self.get_first(dist,'name'),
168+
'type': self.get_first(dist,'encodingFormat'),
169+
'url': self.get_first(dist,'contentUrl'),
165170
'rel': 'download',
166171
'function': 'download'
167172
}
168173

169174
for ct in ['author', 'publisher', 'creator', 'provider', 'funder']:
170175
if ct in md:
176+
ct2 = md[ct]
171177
contact = {}
172178
contact['url'] = md[ct]['url']
173-
contact['individualname'] = md[ct]['name']
179+
contact['individualname'] = self.get_first(ct,'name')
174180
if md[ct]['@type'] == 'Organization':
175-
contact['organization'] = md[ct]['name']
181+
contact['organization'] = self.get_first(ct,'name')
176182

177183
if 'address' in md[ct]:
178-
contact['address'] = md[ct]['streetAddress']
179-
contact['city'] = md[ct]['addressLocality']
180-
contact['administrativearea'] = md[ct]['addressRegion']
181-
contact['postalcode'] = md[ct]['postalCode']
182-
contact['country'] = md[ct]['addressCountry']
184+
contact['address'] = self.get_first(ct,'streetAddress')
185+
contact['city'] = self.get_first(ct,'addressLocality')
186+
contact['administrativearea'] = self.get_first(ct,'addressRegion')
187+
contact['postalcode'] = self.get_first(ct,'postalCode')
188+
contact['country'] = self.get_first(ct,'addressCountry')
183189

184190
if 'contactPoint' in md[ct]:
185-
cp = md[ct][0]
186-
contact['email'] = cp['email']
187-
contact['fax'] = cp['fax']
191+
cp = self.get_first(ct,'contactPoint')
192+
contact['email'] = self.get_first(cp,'email')
193+
contact['fax'] = self.get_first(cp,'fax')
188194

189195
mcf['contact'][ct] = contact
190196

@@ -488,3 +494,37 @@ def generate_link(self, distribution: dict) -> dict:
488494
link['name'] = name[0]
489495

490496
return link
497+
498+
def get_first(self, obj, key, default=None):
499+
"""
500+
returns first element of a list else return element
501+
502+
:param obj: any
503+
504+
:returns: first element (str, num or dict)
505+
"""
506+
if key not in obj.keys() or not obj[key]:
507+
return default
508+
elif isinstance(obj[key], list):
509+
if len(obj[key]) > 0:
510+
return obj[key][0]
511+
else:
512+
return default
513+
else:
514+
return obj[key]
515+
516+
517+
def get_all(self, obj, key, default=[]):
518+
"""
519+
return list of elements
520+
521+
:param obj: any
522+
523+
:returns: list of elements
524+
"""
525+
if 'key' not in obj.keys() or not obj[key]:
526+
return default
527+
elif isinstance(obj[key], list):
528+
return obj[key]
529+
else:
530+
return [obj[key]]

0 commit comments

Comments
 (0)