Skip to content

Commit 399cf81

Browse files
committed
Add support for integrity header geopython#1998 (#15)
* Add support for data integrity header * Support MDN headers * Align Digest with IETF spec Implements testing for https://datatracker.ietf.org/doc/rfc9530/ * Include changes from geopython#1952 * Update main.yml * Fix get_choice_from_headers implementation * Fix flake8 * Update __init__.py * Update __init__.py
1 parent 0439f8e commit 399cf81

File tree

6 files changed

+158
-8
lines changed

6 files changed

+158
-8
lines changed

.github/workflows/main.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ on:
66
- '**.md'
77
- 'docs/**'
88
pull_request:
9-
branches:
10-
- master
119
paths-ignore:
1210
- '!**.md'
1311
- 'docs/**'

pygeoapi/api/__init__.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from datetime import datetime
4646
from functools import partial
4747
from gzip import compress
48+
import hashlib
4849
from http import HTTPStatus
4950
import logging
5051
import re
@@ -100,6 +101,10 @@
100101
(F_NETCDF, 'application/x-netcdf'),
101102
))
102103

104+
#: Digest integrity methods supported
105+
DIGEST_METHODS = ['sha1', 'sha256', 'sha384', 'sha512',
106+
'sha3-256', 'sha3-384', 'sha3-512']
107+
103108
#: Locale used for system responses (e.g. exceptions)
104109
SYSTEM_LOCALE = l10n.Locale('en', 'US')
105110

@@ -162,11 +167,46 @@ def apply_gzip(headers: dict, content: Union[str, bytes]) -> Union[str, bytes]:
162167
f"{headers['Content-Type']}; charset={charset}"
163168
content = compress(content.encode(charset))
164169
except TypeError as err:
165-
headers.pop('Content-Encoding')
166170
LOGGER.error(f'Error in compression: {err}')
171+
172+
if 'Content-Encoding' in headers:
173+
headers.pop('Content-Encoding')
174+
elif 'content-encoding' in headers:
175+
headers.pop('content-encoding')
176+
167177
return content
168178

169179

180+
def apply_integrity(headers: dict, content: Union[str, bytes]):
181+
"""
182+
Apply content header integrete hash to header.
183+
"""
184+
hash_method = get_choice_from_headers(headers, 'want-content-digest')
185+
186+
if hash_method is None:
187+
LOGGER.debug('No digest requested')
188+
return
189+
190+
try:
191+
LOGGER.debug(f'Hashing with {hash_method}')
192+
hash_func = hashlib.new(hash_method)
193+
194+
charset = CHARSET[0]
195+
content_bytes = (content if isinstance(content, bytes)
196+
else content.encode(charset))
197+
198+
hash_func.update(content_bytes)
199+
headers['Content-Digest'] = f'{hash_method}={hash_func.hexdigest()}'
200+
201+
except ValueError:
202+
raise ValueError(f'Unsupported hash method: {hash_method}')
203+
204+
if 'Want-Content-Digest' in headers:
205+
headers.pop('Want-Content-Digest')
206+
elif 'want-content-digest' in headers:
207+
headers.pop('want-content-digest')
208+
209+
170210
class APIRequest:
171211
"""
172212
Transforms an incoming server-specific Request into an object
@@ -237,6 +277,9 @@ def __init__(self, request, supported_locales):
237277
# Determine format
238278
self._format = self._get_format(request.headers)
239279

280+
# Determine digest
281+
self._digest = self._get_digest(request.headers)
282+
240283
# Get received headers
241284
self._headers = self.get_request_headers(request.headers)
242285

@@ -351,6 +394,23 @@ def _get_format(self, headers) -> Union[str, None]:
351394
idx_ = mimes.index(type_)
352395
return fmts[idx_]
353396

397+
def _get_digest(self, headers) -> Union[str, None]:
398+
"""
399+
Get `Request` digest type from query parameters or headers.
400+
401+
:param headers: Dict of Request headers
402+
:returns: digest method or None if not found/specified
403+
"""
404+
hash_methods = get_choice_from_headers(headers, 'want-content-digest',
405+
all=True)
406+
if hash_methods is None:
407+
return
408+
409+
for hash_method in hash_methods:
410+
hash_method = hash_method.lower()
411+
if hash_method in DIGEST_METHODS:
412+
return hash_method
413+
354414
@property
355415
def data(self) -> bytes:
356416
"""Returns the additional data send with the Request (bytes)"""
@@ -467,6 +527,7 @@ def is_valid(self, additional_formats=None) -> bool:
467527
def get_response_headers(self, force_lang: l10n.Locale = None,
468528
force_type: str = None,
469529
force_encoding: str = None,
530+
force_digest: str = None,
470531
**custom_headers) -> dict:
471532
"""
472533
Prepares and returns a dictionary with Response object headers.
@@ -491,6 +552,7 @@ def get_response_headers(self, force_lang: l10n.Locale = None,
491552
:param force_lang: An optional Content-Language header override.
492553
:param force_type: An optional Content-Type header override.
493554
:param force_encoding: An optional Content-Encoding header override.
555+
:param force_digest: An optional Want-Digest header override.
494556
:returns: A header dict
495557
"""
496558

@@ -510,6 +572,11 @@ def get_response_headers(self, force_lang: l10n.Locale = None,
510572
elif F_GZIP in get_from_headers(self._headers, 'accept-encoding'):
511573
headers['Content-Encoding'] = F_GZIP
512574

575+
if force_digest:
576+
headers['Want-Content-Digest'] = force_digest
577+
elif self._digest:
578+
headers['Want-Content-Digest'] = self._digest
579+
513580
return headers
514581

515582
def get_request_headers(self, headers) -> dict:
@@ -522,7 +589,7 @@ def get_request_headers(self, headers) -> dict:
522589
:returns: A header dict
523590
"""
524591

525-
headers_ = {item[0]: item[1] for item in headers.items()}
592+
headers_ = {item[0].title(): item[1] for item in headers.items()}
526593
return headers_
527594

528595

pygeoapi/django_/views.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from django.conf import settings
4141
from django.http import HttpRequest, HttpResponse
4242

43-
from pygeoapi.api import API, APIRequest, apply_gzip
43+
from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity
4444
import pygeoapi.api as core_api
4545
import pygeoapi.api.coverages as coverages_api
4646
import pygeoapi.api.environmental_data_retrieval as edr_api
@@ -550,6 +550,7 @@ def execute_from_django(api_function, request: HttpRequest, *args,
550550
else:
551551

552552
headers, status, content = api_function(api_, api_request, *args)
553+
apply_integrity(headers, content)
553554
content = apply_gzip(headers, content)
554555

555556
# Convert API payload to a django response

pygeoapi/flask_app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from flask import (Flask, Blueprint, make_response, request,
3838
send_from_directory, Response, Request)
3939

40-
from pygeoapi.api import API, APIRequest, apply_gzip
40+
from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity
4141
import pygeoapi.api as core_api
4242
import pygeoapi.api.coverages as coverages_api
4343
import pygeoapi.api.environmental_data_retrieval as edr_api
@@ -151,6 +151,7 @@ def execute_from_flask(api_function, request: Request, *args,
151151
headers, status, content = actual_api.get_format_exception(api_request)
152152
else:
153153
headers, status, content = api_function(actual_api, api_request, *args)
154+
apply_integrity(headers, content)
154155
content = apply_gzip(headers, content)
155156

156157
response = make_response(content, status)

pygeoapi/starlette_app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
)
5050
import uvicorn
5151

52-
from pygeoapi.api import API, APIRequest, apply_gzip
52+
from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity
5353
import pygeoapi.api as core_api
5454
import pygeoapi.api.coverages as coverages_api
5555
import pygeoapi.api.environmental_data_retrieval as edr_api
@@ -133,6 +133,7 @@ async def execute_from_starlette(api_function, request: Request, *args,
133133
headers, status, content = await loop.run_in_executor(
134134
None, call_api_threadsafe, loop, api_function,
135135
actual_api, api_request, *args)
136+
apply_integrity(headers, content)
136137
# NOTE: that gzip currently doesn't work in starlette
137138
# https://github.com/geopython/pygeoapi/issues/1591
138139
content = apply_gzip(headers, content)

tests/api/test_itemtypes.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from shapely.geometry import Point
4242

4343
from pygeoapi.api import (API, FORMAT_TYPES, F_GZIP, F_HTML, F_JSONLD,
44-
apply_gzip)
44+
apply_gzip, apply_integrity)
4545
from pygeoapi.api.itemtypes import (
4646
get_collection_queryables, get_collection_item,
4747
get_collection_items, manage_collection_item)
@@ -415,6 +415,88 @@ def test_collection_items_gzip_csv(config, api_, openapi):
415415
assert rsp_csv == rsp_csv_
416416

417417

418+
def test_collection_no_digest(api_):
419+
req_digest = mock_api_request()
420+
rsp_digest_headers, _, rsp_digest = get_collection_item(
421+
api_, req_digest, 'obs', '371')
422+
apply_integrity(rsp_digest_headers, rsp_digest)
423+
assert rsp_digest_headers['Content-Type'] == 'application/json'
424+
assert 'Content-Digest' not in rsp_digest_headers
425+
426+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100')
427+
rsp_digest_headers, _, rsp_digest = get_collection_item(
428+
api_, req_digest, 'obs', '371')
429+
apply_integrity(rsp_digest_headers, rsp_digest)
430+
assert rsp_digest_headers['Content-Type'] == 'application/json'
431+
assert 'Content-Digest' not in rsp_digest_headers
432+
433+
434+
def test_collection_with_digest(api_):
435+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100,sha1')
436+
rsp_digest_headers, _, rsp_digest = get_collection_item(
437+
api_, req_digest, 'obs', '371')
438+
apply_integrity(rsp_digest_headers, rsp_digest)
439+
assert rsp_digest_headers['Content-Type'] == 'application/json'
440+
assert rsp_digest_headers['Content-Digest'] == 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' # noqa
441+
442+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha256')
443+
rsp_digest_headers, _, rsp_digest = get_collection_item(
444+
api_, req_digest, 'obs', '371')
445+
apply_integrity(rsp_digest_headers, rsp_digest)
446+
assert rsp_digest_headers['Content-Type'] == 'application/json'
447+
assert rsp_digest_headers['Content-Digest'] == 'sha256=f24c899027516b64c13734caf12a5506c8137f8520ab1b08b936e8e14f43faa4' # noqa
448+
449+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha384')
450+
rsp_digest_headers, _, rsp_digest = get_collection_item(
451+
api_, req_digest, 'obs', '371')
452+
apply_integrity(rsp_digest_headers, rsp_digest)
453+
assert rsp_digest_headers['Content-Type'] == 'application/json'
454+
assert rsp_digest_headers['Content-Digest'] == 'sha384=2e875167e36a9d70a11bef48d290dd439741514f28e19680a4eb049f2aeaca96092280dce1458c6072650a678840ee83' # noqa
455+
456+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA512')
457+
rsp_digest_headers, _, rsp_digest = get_collection_item(
458+
api_, req_digest, 'obs', '371')
459+
apply_integrity(rsp_digest_headers, rsp_digest)
460+
assert rsp_digest_headers['Content-Type'] == 'application/json'
461+
assert rsp_digest_headers['Content-Digest'] == 'sha512=a57169dd6a947237df9ab8640cf6bedd57e54cb854cc8843f4aac08c30d4e2c402af8b637b8823f6953b90d61f8fc37db95a68cce9ee0d7b9cc9186fcbf5978a' # noqa
462+
463+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-256')
464+
rsp_digest_headers, _, rsp_digest = get_collection_item(
465+
api_, req_digest, 'obs', '371')
466+
apply_integrity(rsp_digest_headers, rsp_digest)
467+
assert rsp_digest_headers['Content-Type'] == 'application/json'
468+
assert rsp_digest_headers['Content-Digest'] == 'sha3-256=52bd7167f2c74131287e313dc0e6959502626a44069e6b3ab9059aa00cf15c22' # noqa
469+
470+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-384')
471+
rsp_digest_headers, _, rsp_digest = get_collection_item(
472+
api_, req_digest, 'obs', '371')
473+
apply_integrity(rsp_digest_headers, rsp_digest)
474+
assert rsp_digest_headers['Content-Type'] == 'application/json'
475+
assert rsp_digest_headers['Content-Digest'] == 'sha3-384=335b5d9c02c174325b8d9f039ca1acd6783d1d457d1105a091b31baeca023c5896665d5fd7417fbc7ee946231e7ba990' # noqa
476+
477+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA3-512')
478+
rsp_digest_headers, _, rsp_digest = get_collection_item(
479+
api_, req_digest, 'obs', '371')
480+
apply_integrity(rsp_digest_headers, rsp_digest)
481+
assert rsp_digest_headers['Content-Type'] == 'application/json'
482+
assert rsp_digest_headers['Content-Digest'] == 'sha3-512=79f736ddfbc8faca1623c6eb365e48e422aa30d1ebb51cc5aa0b046b1966d8256f2cc1399d3669069d965f56a5148522d05e7d63b78b7b76282034f8e77fb8c2' # noqa
483+
484+
485+
def test_collection_with_digest_and_gzip(api_):
486+
req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA1,sha256',
487+
HTTP_ACCEPT_ENCODING=F_GZIP)
488+
rsp_digest_headers, _, rsp_digest = get_collection_item(
489+
api_, req_digest, 'obs', '371')
490+
apply_integrity(rsp_digest_headers, rsp_digest)
491+
apply_gzip(rsp_digest_headers, rsp_digest)
492+
493+
assert rsp_digest_headers['Content-Type'] == \
494+
'application/json; charset=utf-8'
495+
assert rsp_digest_headers['Content-Digest'] == \
496+
'sha1=0d4818c86215ba031044b27e28cb3170936e8c53'
497+
assert rsp_digest_headers['Content-Encoding'] == F_GZIP
498+
499+
418500
def test_get_collection_items_crs(config, api_):
419501

420502
# Invalid CRS query parameter

0 commit comments

Comments
 (0)