Skip to content

Commit 43e475c

Browse files
authored
Merge pull request #11 from ClimateImpactLab/feature/update-for-ice-sheet
update data acquisition notebook
2 parents a150234 + c6300c6 commit 43e475c

File tree

3 files changed

+90
-86
lines changed

3 files changed

+90
-86
lines changed

HISTORY.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
History
22
=======
33

4-
unreleased
5-
----------
4+
v1.2.1
5+
------
66
* Update GitHub actions versions
7+
* Update data acquisition notebook, using helper functions to download input data from
8+
zenodo
79

810
v1.2.0
911
------

notebooks/data-acquisition.ipynb

+9-84
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,17 @@
3939
},
4040
"outputs": [],
4141
"source": [
42-
"import tempfile\n",
43-
"from io import BytesIO\n",
44-
"from os import environ\n",
4542
"from pathlib import Path\n",
46-
"from zipfile import ZipFile\n",
4743
"\n",
48-
"import numpy as np\n",
4944
"import pandas as pd\n",
5045
"import requests\n",
5146
"from cartopy.io import shapereader\n",
52-
"from fsspec import FSTimeoutError\n",
53-
"from fsspec.implementations.zip import ZipFileSystem\n",
47+
"from pyCIAM.io import (\n",
48+
" download_and_extract_from_zenodo,\n",
49+
" download_and_extract_partial_zip,\n",
50+
" get_zenodo_file_list,\n",
51+
")\n",
52+
"from pyCIAM.utils import copy\n",
5453
"from shared import (\n",
5554
" DIR_SHP,\n",
5655
" DIR_SLR_AR5_IFILES_RAW,\n",
@@ -71,9 +70,7 @@
7170
" PATH_SLR_HIST_TREND_MAP,\n",
7271
" PATHS_SURGE_LOOKUP,\n",
7372
" save,\n",
74-
")\n",
75-
"\n",
76-
"from pyCIAM.utils import copy"
73+
")"
7774
]
7875
},
7976
{
@@ -123,76 +120,6 @@
123120
"Z_URL_SLIIDERS_PC = Z_URL_RECORDS"
124121
]
125122
},
126-
{
127-
"cell_type": "code",
128-
"execution_count": 49,
129-
"id": "8d519f83-eb91-4cb0-b2e7-5918b91d5143",
130-
"metadata": {
131-
"tags": []
132-
},
133-
"outputs": [],
134-
"source": [
135-
"def get_download_link(files, prefix):\n",
136-
" links = [\n",
137-
" i[\"links\"]\n",
138-
" for i in files\n",
139-
" if i.get(\"filename\", \"\").startswith(prefix)\n",
140-
" or i.get(\"key\", \"\").startswith(prefix)\n",
141-
" ]\n",
142-
" assert len(links) == 1\n",
143-
" links = links[0]\n",
144-
" return links.get(\"download\", links[\"self\"])\n",
145-
"\n",
146-
"\n",
147-
"def download_and_extract_full_zip(lpath, url):\n",
148-
" if lpath.exists():\n",
149-
" return None\n",
150-
" lpath.parent.mkdir(exist_ok=True, parents=True)\n",
151-
"\n",
152-
" content = BytesIO(requests.get(url, params=PARAMS).content)\n",
153-
" if isinstance(lpath, Path):\n",
154-
" with ZipFile(content, \"r\") as zip_ref:\n",
155-
" zip_ref.extractall(lpath)\n",
156-
" else:\n",
157-
" with tempfile.TemporaryDirectory() as tmpdir:\n",
158-
" with ZipFile(content, \"r\") as zip_ref:\n",
159-
" zip_ref.extractall(tmpdir)\n",
160-
" copy(Path(tmpdir), lpath)\n",
161-
"\n",
162-
"\n",
163-
"def download_and_extract_partial_zip(lpath, url, zip_glob, n_retries=5):\n",
164-
" lpath.mkdir(exist_ok=True, parents=True)\n",
165-
" z = ZipFileSystem(url)\n",
166-
" if isinstance(zip_glob, (list, set, tuple, np.ndarray)):\n",
167-
" files_remote = zip_glob\n",
168-
" else:\n",
169-
" files_remote = [p for p in z.glob(zip_glob) if not p.endswith(\"/\")]\n",
170-
" files_local = [lpath / Path(f).name for f in files_remote]\n",
171-
" for fr, fl in list(zip(files_remote, files_local)):\n",
172-
" if not fl.is_file():\n",
173-
" retries = 0\n",
174-
" while retries < n_retries:\n",
175-
" print(f\"...Downloading {fl.name} (attempt {retries+1}/{n_retries})\")\n",
176-
" try:\n",
177-
" data = z.cat_file(fr)\n",
178-
" break\n",
179-
" except FSTimeoutError:\n",
180-
" if retries < (n_retries - 1):\n",
181-
" retries += 1\n",
182-
" else:\n",
183-
" raise\n",
184-
" print(f\"...Writing {fl.name}\")\n",
185-
" fl.write_bytes(data)\n",
186-
"\n",
187-
"\n",
188-
"def download_and_extract_from_zenodo(lpath, files, prefix, zip_glob=None):\n",
189-
" dl = get_download_link(files, prefix)\n",
190-
" if zip_glob is None:\n",
191-
" return download_and_extract_full_zip(lpath, dl)\n",
192-
" else:\n",
193-
" return download_and_extract_partial_zip(lpath, dl, zip_glob)"
194-
]
195-
},
196123
{
197124
"cell_type": "code",
198125
"execution_count": 5,
@@ -202,9 +129,7 @@
202129
},
203130
"outputs": [],
204131
"source": [
205-
"pyciam_files = requests.get(\n",
206-
" Z_URL_SLIIDERS_PC.format(doi=Z_PYCIAM_DOI), params=PARAMS\n",
207-
").json()[\"files\"]"
132+
"pyciam_files = get_zenodo_file_list(Z_PYCIAM_DOI)"
208133
]
209134
},
210135
{
@@ -628,7 +553,7 @@
628553
"name": "python",
629554
"nbconvert_exporter": "python",
630555
"pygments_lexer": "ipython3",
631-
"version": "3.10.8"
556+
"version": "3.12.2"
632557
},
633558
"widgets": {
634559
"application/vnd.jupyter.widget-state+json": {

pyCIAM/io.py

+77
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,22 @@
88
* load_diaz_inputs
99
"""
1010

11+
import tempfile
12+
from collections.abc import Iterable
13+
from io import BytesIO
14+
from pathlib import Path
15+
from zipfile import ZipFile
16+
1117
import dask.array as da
1218
import numpy as np
1319
import pandas as pd
1420
import pint_xarray # noqa: F401
21+
import requests
1522
import xarray as xr
23+
from fsspec import FSTimeoutError
24+
from fsspec.implementations.zip import ZipFileSystem
1625

26+
from pyCIAM.utils import copy
1727
from pyCIAM.utils import spherical_nearest_neighbor as snn
1828

1929
from .utils import _s2d
@@ -783,3 +793,70 @@ def load_diaz_inputs(
783793

784794
inputs = inputs.drop_dims("rcp_pt")
785795
return inputs, slr
796+
797+
798+
def get_zenodo_file_list(doi, params={}):
799+
return requests.get(f"https://zenodo.org/api/records/{doi}", params=params).json()[
800+
"files"
801+
]
802+
803+
804+
def get_download_link(files, prefix):
805+
links = [
806+
i["links"]
807+
for i in files
808+
if i.get("filename", "").startswith(prefix)
809+
or i.get("key", "").startswith(prefix)
810+
]
811+
assert len(links) == 1
812+
links = links[0]
813+
return links.get("download", links["self"])
814+
815+
816+
def _download_and_extract_full_zip(lpath, url, params={}):
817+
if lpath.exists():
818+
return None
819+
lpath.parent.mkdir(exist_ok=True, parents=True)
820+
821+
content = BytesIO(requests.get(url, params=params).content)
822+
if isinstance(lpath, Path):
823+
with ZipFile(content, "r") as zip_ref:
824+
zip_ref.extractall(lpath)
825+
else:
826+
with tempfile.TemporaryDirectory() as tmpdir:
827+
with ZipFile(content, "r") as zip_ref:
828+
zip_ref.extractall(tmpdir)
829+
copy(Path(tmpdir), lpath)
830+
831+
832+
def download_and_extract_partial_zip(lpath, url, zip_glob, n_retries=5):
833+
lpath.mkdir(exist_ok=True, parents=True)
834+
z = ZipFileSystem(url)
835+
if isinstance(zip_glob, (list, set, tuple, np.ndarray)):
836+
files_remote = zip_glob
837+
else:
838+
files_remote = [p for p in z.glob(zip_glob) if not p.endswith("/")]
839+
files_local = [lpath / Path(f).name for f in files_remote]
840+
for fr, fl in list(zip(files_remote, files_local)):
841+
if not fl.is_file():
842+
retries = 0
843+
while retries < n_retries:
844+
print(f"...Downloading {fl.name} (attempt {retries+1}/{n_retries})")
845+
try:
846+
data = z.cat_file(fr)
847+
break
848+
except FSTimeoutError:
849+
if retries < (n_retries - 1):
850+
retries += 1
851+
else:
852+
raise
853+
print(f"...Writing {fl.name}")
854+
fl.write_bytes(data)
855+
856+
857+
def download_and_extract_from_zenodo(lpath, files, prefix, zip_glob=None):
858+
dl = get_download_link(files, prefix)
859+
if zip_glob is None:
860+
return _download_and_extract_full_zip(lpath, dl)
861+
else:
862+
return download_and_extract_partial_zip(lpath, dl, zip_glob)

0 commit comments

Comments
 (0)