Skip to content

Commit 3046eb0

Browse files
mjziebarthrth
authored andcommitted
FIX Geographic coordinates (#116)
1 parent db07202 commit 3046eb0

File tree

4 files changed

+148
-30
lines changed

4 files changed

+148
-30
lines changed

pykrige/ok.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,14 @@ def print_statistics(self):
459459
def _get_kriging_matrix(self, n):
460460
"""Assembles the kriging matrix."""
461461

462-
xy = np.concatenate((self.X_ADJUSTED[:, np.newaxis],
463-
self.Y_ADJUSTED[:, np.newaxis]), axis=1)
464-
d = cdist(xy, xy, 'euclidean')
462+
if self.coordinates_type == 'euclidean':
463+
xy = np.concatenate((self.X_ADJUSTED[:, np.newaxis],
464+
self.Y_ADJUSTED[:, np.newaxis]), axis=1)
465+
d = cdist(xy, xy, 'euclidean')
466+
elif self.coordinates_type == 'geographic':
467+
d = core.great_circle_distance(self.X_ADJUSTED[:,np.newaxis],
468+
self.Y_ADJUSTED[:,np.newaxis],
469+
self.X_ADJUSTED, self.Y_ADJUSTED)
465470
a = np.zeros((n+1, n+1))
466471
a[:n, :n] = - self.variogram_function(self.variogram_model_parameters,
467472
d)
@@ -660,6 +665,11 @@ def execute(self, style, xpoints, ypoints, mask=None, backend='vectorized',
660665
raise ValueError("style argument must be 'grid', "
661666
"'points', or 'masked'")
662667

668+
if n_closest_points is not None and n_closest_points <= 1:
669+
# If this is not checked, nondescriptive errors emerge
670+
# later in the code.
671+
raise ValueError("n_closest_points has to be at least two!")
672+
663673
xpts = np.atleast_1d(np.squeeze(np.array(xpoints, copy=True)))
664674
ypts = np.atleast_1d(np.squeeze(np.array(ypoints, copy=True)))
665675
n = self.X_ADJUSTED.shape[0]
@@ -699,28 +709,16 @@ def execute(self, style, xpoints, ypoints, mask=None, backend='vectorized',
699709
[self.XCENTER, self.YCENTER],
700710
[self.anisotropy_scaling],
701711
[self.anisotropy_angle]).T
712+
# Prepare for cdist:
702713
xy_data = np.concatenate((self.X_ADJUSTED[:, np.newaxis],
703714
self.Y_ADJUSTED[:, np.newaxis]), axis=1)
704715
xy_points = np.concatenate((xpts[:, np.newaxis],
705716
ypts[:, np.newaxis]), axis=1)
706717
elif self.coordinates_type == 'geographic':
707-
# Quick version: Only difference between euclidean and spherical
708-
# space regarding kriging is the distance metric.
709-
# Since the relationship between three dimensional euclidean
710-
# distance and great circle distance on the sphere is monotonous,
711-
# use the existing (euclidean) infrastructure for nearest neighbour
712-
# search and distance calculation in euclidean three space and
713-
# convert distances to great circle distances afterwards.
714-
lon_d = self.X_ADJUSTED[:, np.newaxis] * np.pi / 180.0
715-
lat_d = self.Y_ADJUSTED[:, np.newaxis] * np.pi / 180.0
716-
xy_data = np.concatenate((np.cos(lon_d) * np.cos(lat_d),
717-
np.sin(lon_d) * np.cos(lat_d),
718-
np.sin(lat_d)), axis=1)
719-
lon_p = xpts[:, np.newaxis] * np.pi / 180.0
720-
lat_p = ypts[:, np.newaxis] * np.pi / 180.0
721-
xy_points = np.concatenate((np.cos(lon_p) * np.cos(lat_p),
722-
np.sin(lon_p) * np.cos(lat_p),
723-
np.sin(lat_p)), axis=1)
718+
# In spherical coordinates, we do not correct for anisotropy.
719+
# Also, we don't use scipy.spatial.cdist, so we do not have to
720+
# format the input data accordingly.
721+
pass
724722

725723
if style != 'masked':
726724
mask = np.zeros(npt, dtype='bool')
@@ -741,13 +739,36 @@ def execute(self, style, xpoints, ypoints, mask=None, backend='vectorized',
741739
c_pars = {key: getattr(self, key) for key in ['Z', 'eps', 'variogram_model_parameters', 'variogram_function']}
742740

743741
if n_closest_points is not None:
742+
if self.coordinates_type == 'geographic':
743+
# To make use of the KDTree, we have to convert the
744+
# spherical coordinates into three dimensional Euclidean
745+
# coordinates, since the standard KDTree cannot handle
746+
# the periodicity.
747+
# Do the conversion just for the step involving the KDTree:
748+
lon_d = self.X_ADJUSTED[:, np.newaxis] * np.pi / 180.0
749+
lat_d = self.Y_ADJUSTED[:, np.newaxis] * np.pi / 180.0
750+
xy_data = np.concatenate((np.cos(lon_d) * np.cos(lat_d),
751+
np.sin(lon_d) * np.cos(lat_d),
752+
np.sin(lat_d)), axis=1)
753+
lon_p = xpts[:, np.newaxis] * np.pi / 180.0
754+
lat_p = ypts[:, np.newaxis] * np.pi / 180.0
755+
xy_points = np.concatenate((np.cos(lon_p) * np.cos(lat_p),
756+
np.sin(lon_p) * np.cos(lat_p),
757+
np.sin(lat_p)), axis=1)
758+
744759
from scipy.spatial import cKDTree
745760
tree = cKDTree(xy_data)
746761
bd, bd_idx = tree.query(xy_points, k=n_closest_points, eps=0.0)
762+
747763
if self.coordinates_type == 'geographic':
748-
# Convert euclidean distances to great circle distances:
749-
bd = core.euclid3_to_great_circle(bd)
750-
764+
# Between the nearest neighbours from Euclidean search,
765+
# calculate the great circle distance using the standard method:
766+
x_points = np.tile(xpts[:,np.newaxis],(1,n_closest_points))
767+
y_points = np.tile(ypts[:,np.newaxis],(1,n_closest_points))
768+
bd = core.great_circle_distance(x_points, y_points,
769+
self.X_ADJUSTED[bd_idx],
770+
self.Y_ADJUSTED[bd_idx])
771+
751772
if backend == 'loop':
752773
zvalues, sigmasq = \
753774
self._exec_loop_moving_window(a, bd, mask, bd_idx)
@@ -760,11 +781,13 @@ def execute(self, style, xpoints, ypoints, mask=None, backend='vectorized',
760781
raise ValueError('Specified backend {} for a moving window '
761782
'is not supported.'.format(backend))
762783
else:
763-
bd = cdist(xy_points, xy_data, 'euclidean')
764-
if self.coordinates_type == 'geographic':
765-
# Convert euclidean distances to great circle distances:
766-
bd = core.euclid3_to_great_circle(bd)
767-
784+
if self.coordinates_type == 'euclidean':
785+
bd = cdist(xy_points, xy_data, 'euclidean')
786+
elif self.coordinates_type == 'geographic':
787+
bd = core.great_circle_distance(xpts[:,np.newaxis],
788+
ypts[:,np.newaxis],
789+
self.X_ADJUSTED, self.Y_ADJUSTED)
790+
768791
if backend == 'vectorized':
769792
zvalues, sigmasq = self._exec_vector(a, bd, mask)
770793
elif backend == 'loop':

pykrige/tests/test_core.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import pytest
1515
from pytest import approx
1616
from numpy.testing import assert_allclose
17+
from scipy.spatial.distance import cdist
1718

1819
from pykrige import kriging_tools as kt
1920
from pykrige import core
@@ -1996,7 +1997,7 @@ def test_geometric_code():
19961997
rtol=1e-5)
19971998

19981999

1999-
def test_ok_geometric():
2000+
def test_ok_geographic():
20002001
# Generate random data:
20012002
np.random.seed(89239413)
20022003
lon = 360.0*np.random.rand(50, 1)
@@ -2013,3 +2014,87 @@ def test_ok_geometric():
20132014

20142015
# Execute on grid:
20152016
z, ss = OK.execute('grid', grid_lon, grid_lat)
2017+
2018+
2019+
def test_ok_geographic_vs_euclid():
2020+
# Generate some random data close to the north pole.
2021+
# Then we use a polar projected 2d euclidean coordinate
2022+
# system and compare the kriging results in that coordinate
2023+
# system with the geographic-option results.
2024+
# If data point distance to the north pole is small enough
2025+
# (choose maximum 0.01 degrees), the differences due to curvature
2026+
# should be negligible.
2027+
np.random.seed(89239413)
2028+
from_north = 1e-2*np.random.random(5)
2029+
lat = 90.0-from_north
2030+
lon = 360.0*np.random.random(5)
2031+
z = np.random.random(5)
2032+
z -= z.mean()
2033+
x = from_north * np.cos(np.deg2rad(lon))
2034+
y = from_north * np.sin(np.deg2rad(lon))
2035+
2036+
# Generate grids:
2037+
grid_lon = 360.0 * np.linspace(0, 1, 50)
2038+
grid_from_north = np.linspace(0,0.01,10)
2039+
grid_lat = 90.0 - grid_from_north
2040+
grid_x = grid_from_north[:,np.newaxis] * np.cos(np.deg2rad(grid_lon[np.newaxis,:]))
2041+
grid_y = grid_from_north[:,np.newaxis] * np.sin(np.deg2rad(grid_lon[np.newaxis,:]))
2042+
grid_lon, grid_lat = np.meshgrid(grid_lon, grid_lat, indexing='xy')
2043+
2044+
# Flatten the grids:
2045+
grid_x = grid_x.flatten()
2046+
grid_y = grid_y.flatten()
2047+
grid_lon = grid_lon.flatten()
2048+
grid_lat = grid_lat.flatten()
2049+
2050+
# Calculate and compare distance matrices ensuring that that part
2051+
# of the workflow works as intended (tested: 2e-9 is currently the
2052+
# match for this setup):
2053+
d_eucl = cdist(np.concatenate([x[:,np.newaxis],y[:,np.newaxis]],axis=1),
2054+
np.concatenate([grid_x[:,np.newaxis],grid_y[:,np.newaxis]],axis=1))
2055+
d_geo = core.great_circle_distance(lon[:,np.newaxis], lat[:,np.newaxis],
2056+
grid_lon[np.newaxis,:], grid_lat[np.newaxis,:])
2057+
assert_allclose(d_eucl,d_geo, rtol=2e-9)
2058+
2059+
# Create ordinary kriging objects:
2060+
OK_geo = OrdinaryKriging(lon, lat, z, variogram_model='linear', verbose=False,
2061+
enable_plotting=False, coordinates_type='geographic')
2062+
OK_xy = OrdinaryKriging(x, y, z, variogram_model='linear', verbose=False,
2063+
enable_plotting=False)
2064+
OK_wrong = OrdinaryKriging(lon, lat, z, variogram_model='linear', verbose=False,
2065+
enable_plotting=False)
2066+
2067+
# Execute on grid:
2068+
zgeo, ss = OK_geo.execute('points', grid_lon, grid_lat)
2069+
zxy, ss = OK_xy.execute('points', grid_x, grid_y)
2070+
zwrong, ss = OK_wrong.execute('points', grid_lon, grid_lat)
2071+
2072+
# Assert equivalence / difference (tested: 2e-5 is currently the
2073+
# match for this setup):
2074+
assert_allclose(zgeo, zxy, rtol=2e-5)
2075+
assert not np.any(zgeo == 0)
2076+
assert np.abs((zgeo-zwrong)/zgeo).max() > 1.0
2077+
2078+
2079+
def test_ok_geometric_closest_points():
2080+
# Generate random data:
2081+
np.random.seed(89239413)
2082+
lon = 360.0*np.random.rand(50, 1)
2083+
lat = 180.0*np.random.rand(50, 1) - 90.0
2084+
z = np.random.rand(50, 1)
2085+
2086+
# Generate grid:
2087+
grid_lon = 360.0*np.random.rand(120, 1)
2088+
grid_lat = 180.0*np.random.rand(120, 1) - 90.0
2089+
2090+
# Create ordinary kriging object:
2091+
OK = OrdinaryKriging(lon, lat, z, variogram_model='linear', verbose=False,
2092+
enable_plotting=False, coordinates_type='geographic')
2093+
2094+
# Execute on grid:
2095+
with pytest.raises(ValueError):
2096+
# Test OK raising ValueError when closest_points == 1:
2097+
z, ss = OK.execute('grid', grid_lon, grid_lat, n_closest_points=1,
2098+
backend='C')
2099+
z, ss = OK.execute('grid', grid_lon, grid_lat, n_closest_points=5,
2100+
backend='C')

release_notes.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# PyKrige Changelog
22

3+
## Version 1.4.1
4+
*??? Date *
5+
6+
### New features
7+
* Added method to obtain variogram model points. PR[#94](https://github.com/bsmurphy/PyKrige/pull/94) by [Daniel Mejía Raigosa](https://github.com/Daniel-M)
8+
9+
### Bug fixes
10+
* Fixed OrdinaryKriging readme example. PR[#107](https://github.com/bsmurphy/PyKrige/pull/107) by [Harry Matchette-Downes](https://github.com/harrymd)
11+
* Fixed kriging matrix not being calculated correctly for geographic coordinates. PR[99](https://github.com/bsmurphy/PyKrige/pull/99) by [Mike Rilee](https://github.com/michaelleerilee)
12+
313
## Version 1.4
414
*April 24, 2018*
515

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
1515

1616
NAME = 'PyKrige'
17-
VERSION = '1.4.0'
17+
VERSION = '1.4.1'
1818
AUTHOR = 'Benjamin S. Murphy'
1919
2020
URL = 'https://github.com/bsmurphy/PyKrige'

0 commit comments

Comments
 (0)