giocaizzi / py3dinterpolations Goto Github PK
View Code? Open in Web Editor NEWHome Page: https://giocaizzi.github.io/py3dinterpolations/
License: MIT License
Home Page: https://giocaizzi.github.io/py3dinterpolations/
License: MIT License
).preprocess()
# patch the reverse_preprocessing function
# returning a GriddData object
# created with a sample of the test_data
with patch(
"py3dinterpolations.plotting.plotting.reverse_preprocessing",
return_value=GridData(test_data.sample(n=50, random_state=42)),
):
fig = plot_downsampling(gd, preprocessed_gd)
# assert is figure
assert isinstance(fig, Figure)
# assert figure has n visible axes
n = len(gd.data.index.get_level_values("ID").unique())
visible_axes = [ax for ax in fig.axes if ax.get_visible()]
assert len(visible_axes) == n
# # TODO assert each axes has 2 lines
import numpy as np
# TODO: make this a custom sklearn class
class DeterministicModel:
"""base class for all deterministic models
that is not needed for the estimator
find a flexible way to handle this, probably a good way is to use directly
the sci-kit wrapper Krige()
"""interpolator"""
from typing import Union, Tuple
import numpy as np
from ..core.griddata import GridData
from ..core import create_regulargrid3d_from_griddata
from .modeler import Modeler
from .preprocessor import Preprocessor
from .estimator import Estimator
def interpolate(
griddata: GridData,
model_name: str,
grid_resolution: float,
model_params: dict = {},
model_params_grid: dict = {},
preprocess_kwags: dict = {},
predict_kwags: dict = {},
return_model: bool = False,
) -> Union[np.ndarray, Tuple[np.ndarray, Modeler]]:
"""interpolate griddata
Interpolate griddata using a Modeler instance that wraps all supported
models. The model is selected with the argument `model_name`.
If the `model_params` is passed, then the model is initialized with those
parameters. Otherwise, to make a search for the best parameters, use the
`model_params_grid` argument.
The 3D grid for interpolation is retrived from the training data.
At the moment features only a regular grid.
If requested, the griddata is preprocessed using the Preprocessor class.
Args:
griddata (GridData): griddata to interpolate
model_name (str): model name
grid_resolution (float): grid resolution
model_params (dict, optional): model parameters. Defaults to {}.
preprocess_kwags (dict, optional): preprocessing parameters. Defaults to {}.
predict_kwags (dict, optional): prediction parameters. Defaults to {}.
return_model (bool, optional): return model. Defaults to False.
Returns:
Union[np.ndarray, Tuple[np.ndarray, Modeler]]:
interpolated griddata, optionally with model
Raises:
ValueError: either model_params or model_params_grid must be passed
ValueError: model_params and model_params_grid cannot be passed together
NotImplementedError: only RegularGrid3D is supported.
NotImplementedError: Parameter search is only supported for ordinary_kriging
Examples:
>>> # interpolate griddata
>>> interpolated = interpolate(
>>> griddata,
>>> model_name="ordinary_kriging",
>>> grid_resolution=5,
>>> model_params={
>>> "variogram_model": "linear",
>>> "nlags": 6,
>>> "weight": True,
>>> },
>>> preprocess_kwags={
>>> "downsampling_res": 0.1,
>>> "normalize_xyz": True,
>>> "standardize_v": True,
>>> },
>>> )
"""
# check model_params and model_params_grid
if model_params == {} and model_params_grid == {}:
raise ValueError("either model_params or model_params_grid must be passed")
if model_params != {} and model_params_grid != {}:
raise ValueError("model_params and model_params_grid cannot be passed together")
# check grid_resolution is of supported type
if isinstance(grid_resolution, float) or isinstance(grid_resolution, int):
# retrive associated grid
grid3d = create_regulargrid3d_from_griddata(griddata, grid_resolution)
else:
raise NotImplementedError("only RegularGrid3D is supported.")
# preprocess griddata if needed
if preprocess_kwags != {}:
# preprocessor
preprocessor = Preprocessor(griddata, **preprocess_kwags)
# get new griddata
griddata = preprocessor.preprocess()
if model_params == {}:
# implemented only for ordinary_kriging
if model_name != "ordinary_kriging":
raise NotImplementedError(
"Parameter search is only supported for ordinary_kriging"
)
# estimate
est = Estimator(griddata, model_params_grid)
# TODO: krige wrapper, method key param
# that is not needed for the estimator
# find a flexible way to handle this, probably a good way is to use directly
# the sci-kit wrapper Krige()
model_params = est.best_params
model_params.pop("method")
# init Modeler
model = Modeler(
griddata=griddata,
grid3d=grid3d,
model_name=model_name,
model_params=model_params,
)
# make predictions
predictions = model.predict(**predict_kwags)
# return model
if return_model:
return predictions, model
else:
return predictions
pykrige should not require a reshape
)
# reshape fron zxy to xyz
# TODO: reshape_pykrige
# pykrige should not require a reshape
if self.model._model_name == "ordinary_kriging":
# reshape pykrige output
interpolated = _reshape_pykrige(interpolated)
when pickling the model, the mesh attribute of grid3d is empty, although was calculated initially
Args:
x (np.ndarray): x coordinates of the points where the model will be evaluated
y (np.ndarray): y coordinates of the points where the model will be evaluated
z (np.ndarray): z coordinates of the points where the model will be evaluated
values (np.ndarray): values of the points where the model will be evaluated
power (float): power of the IDW model
Attributes:
power (float): power of the IDW model
distance_matrix (np.ndarray): distance matrix between
observations and data points
"""Inverse Distance Weighting (IDW) model"""
import numpy as np
import math
from .deterministic import DeterministicModel
class IDW(DeterministicModel):
"""Simple IDW Model object
IDW: Inverse Distance Weigthting
Offers tweaking of the power parameter of the IDW model.
Could be really slow for large datasets, due to the use of loops.
TODO: Remove loopings use a vectorized approach
Args:
x (np.ndarray): x coordinates of the points where the model will be evaluated
y (np.ndarray): y coordinates of the points where the model will be evaluated
z (np.ndarray): z coordinates of the points where the model will be evaluated
values (np.ndarray): values of the points where the model will be evaluated
power (float): power of the IDW model
Attributes:
power (float): power of the IDW model
distance_matrix (np.ndarray): distance matrix between
observations and data points
"""
def __init__(
self,
x: np.ndarray,
y: np.ndarray,
z: np.ndarray,
values: np.ndarray,
power: float = 1,
):
# initialize parent class
super().__init__(x=x, y=y, z=z, values=values)
# set power
self.power = power
def _compute_point(
self,
x: float,
y: float,
z: float,
power: float,
threshold: float = 0.0000000001,
) -> float:
"""find value at a point
Loops through all the data points to estimate the value at a point
based on the Inverse Distance Weighting (IDW) method.
Might be really slow for large datasets.
This implementation however offers easy advanced parametrization with:
- n_points: number of points to use for the IDW estimation (TODO)
- max_distance: maximum distance to use for the IDW estimation (TODO)
"""
nominator = 0
denominator = 0
# distance in 3d
for i in range(0, len(self.values)):
dist = math.sqrt(
(x - self.x[i]) * (x - self.x[i])
+ (y - self.y[i]) * (y - self.y[i])
+ (z - self.z[i]) * (z - self.z[i])
)
# If the point is really close to one of the data points,
# return the data point value to avoid singularities
# EXACT interpolations
if dist < threshold:
return self.values[i]
nominator = nominator + (self.values[i] / pow(dist, power))
denominator = denominator + (1 / pow(dist, power))
# Return NaN if the denominator is zero
if denominator > 0:
value = nominator / denominator
else:
value = np.nan
return value
def compute(
self, gridx: np.ndarray, gridy: np.ndarray, gridz: np.ndarray
) -> np.ndarray:
meshx, meshy, meshz = np.meshgrid(gridx, gridy, gridz, indexing="xy")
# Create a new mesh to store the coordinates
new_mesh = np.zeros_like(meshx)
# Iterate over the indices of the arrays
for i in range(meshx.shape[0]):
for j in range(meshx.shape[1]):
for k in range(meshx.shape[2]):
# Store the coordinates in the new mesh
new_mesh[i, j, k] = self._compute_point(
x=meshx[i, j, k],
y=meshy[i, j, k],
z=meshz[i, j, k],
power=self.power,
)
return new_mesh
run a full documentation buil precheck on pull requests on main
else:
pytest.skip("NOT IMPLEMENTED YET")
# TODO: assert something if downsampling is not None
documentation build fails
run all notebooks to:
package released on PyPI is empty
contains only init.py
Restructured interpolate
This implementation however offers easy advanced parametrization with:
n_points: number of points to use for the IDW estimation
max_distance: maximum distance to use for the IDW estimation
"""Inverse Distance Weighting (IDW) model"""
import numpy as np
import math
from .deterministic import DeterministicModel
class IDW(DeterministicModel):
"""Simple IDW Model object
IDW: Inverse Distance Weigthting
Offers tweaking of the power parameter of the IDW model.
Could be really slow for large datasets, due to the use of loops.
TODO: Remove loopings use a vectorized approach
Args:
x (np.ndarray): x coordinates of the points where the model will be evaluated
y (np.ndarray): y coordinates of the points where the model will be evaluated
z (np.ndarray): z coordinates of the points where the model will be evaluated
values (np.ndarray): values of the points where the model will be evaluated
power (float): power of the IDW model
Attributes:
power (float): power of the IDW model
distance_matrix (np.ndarray): distance matrix between
observations and data points
"""
def __init__(
self,
x: np.ndarray,
y: np.ndarray,
z: np.ndarray,
values: np.ndarray,
power: float = 1,
):
# initialize parent class
super().__init__(x=x, y=y, z=z, values=values)
# set power
self.power = power
def _compute_point(
self,
x: float,
y: float,
z: float,
power: float,
threshold: float = 0.0000000001,
) -> float:
"""find value at a point
Loops through all the data points to estimate the value at a point
based on the Inverse Distance Weighting (IDW) method.
Might be really slow for large datasets.
TODO: Implement parametrization
This implementation however offers easy advanced parametrization with:
- n_points: number of points to use for the IDW estimation
- max_distance: maximum distance to use for the IDW estimation
"""
nominator = 0
denominator = 0
# distance in 3d
for i in range(0, len(self.values)):
dist = math.sqrt(
(x - self.x[i]) * (x - self.x[i])
+ (y - self.y[i]) * (y - self.y[i])
+ (z - self.z[i]) * (z - self.z[i])
)
# If the point is really close to one of the data points,
# return the data point value to avoid singularities
# EXACT interpolations
if dist < threshold:
return self.values[i]
nominator = nominator + (self.values[i] / pow(dist, power))
denominator = denominator + (1 / pow(dist, power))
# Return NaN if the denominator is zero
if denominator > 0:
value = nominator / denominator
else:
value = np.nan
return value
def compute(
self, gridx: np.ndarray, gridy: np.ndarray, gridz: np.ndarray
) -> np.ndarray:
meshx, meshy, meshz = np.meshgrid(gridx, gridy, gridz, indexing="xy")
# Create a new mesh to store the coordinates
new_mesh = np.zeros_like(meshx)
# Iterate over the indices of the arrays
for i in range(meshx.shape[0]):
for j in range(meshx.shape[1]):
for k in range(meshx.shape[2]):
# Store the coordinates in the new mesh
new_mesh[i, j, k] = self._compute_point(
x=meshx[i, j, k],
y=meshy[i, j, k],
z=meshz[i, j, k],
power=self.power,
)
return new_mesh
taking the median, or the max
Args:
data (pd.DataFrame): data to downsample
Returns:
pd.DataFrame: downsampled data
"""preprocessor module"""
import pandas as pd
from ..core import GridData
from typing import Union
class Preprocessor:
"""Preprocessor class
Preprocess data before interpolation. Preprocessing includes:
- Downsampling, by taking the mean of blocks of given resolution
- Normalization of X Y Z
- Standardization of V
By calling the preprocess method, the data is preprocessed and a new
GridData object is returned with the preprocessed data and the
preprocessing parameters set.
Args:
griddata (GridData): GridData object to preprocess
downsampling_res (Union[float, None]): resolution to downsample data
by taking the mean of blocks of given resolution. If None, no
downsampling is applied. Default is None.
normalize_xyz (bool): whether to normalize X Y Z. Default is True.
standardize_v (bool): whether to standardize V. Default is True.
Attributes:
griddata (GridData): GridData object to preprocess
downsampling_res (Union[float, None]): resolution to downsample data
by taking the mean of blocks of given resolution. If None, no
downsampling is applied. Default is None.
normalize_xyz (bool): whether to normalize X Y Z. Default is True.
standardize_v (bool): whether to standardize V. Default is True.
preprocessor_params (dict): dictionary with the parameters of the
preprocessing. It is set after calling the preprocess method.::
preprocessor_params = {
"downsampling": {
"resolution": downsampling_res,
},
"normalization": {
"X": {
"min": min_value_of_X,
"max": max_value_of_X,
},
"Y": {
"min": min_value_of_Y,
"max": max_value_of_Y,
},
"Z": {
"min": min_value_of_Z,
"max": max_value_of_Z,
},
},
"standardization": {
"mean": mean_value_of_V,
"std": std_value_of_V,
},
}
Examples:
>>> # preprocess data
>>> preprocessor = Preprocessor(
>>> griddata,
>>> downsampling_res=0.1,
>>> normalize_xyz=True,
>>> standardize_v=True,
>>> )
>>> griddata = preprocessor.preprocess()
"""
preprocessor_params = {}
def __init__(
self,
griddata: GridData,
downsampling_res: Union[float, None] = None,
normalize_xyz: bool = True,
standardize_v: bool = True,
):
self.griddata = griddata
self.downsampling_res = downsampling_res
self.normalize_xyz = normalize_xyz
self.standardize_v = standardize_v
def preprocess(self) -> GridData:
"""preprocess data
Returns:
GridData: new GridData object with the preprocessed data
and prerpocessing parameters set
"""
# get data
data = self.griddata.data.copy().reset_index()[["ID", "X", "Y", "Z", "V"]]
# first dowmsample
if self.downsampling_res is not None:
data = self._downsample_data(data)
# normalize
if self.normalize_xyz:
data = self._normalize_xyz(data)
# standardize
if self.standardize_v:
data = self._standardize_v(data)
# return new object with preprocessed data and parameters
return GridData(data, preprocessor_params=self.preprocessor_params)
def _normalize_xyz(self, data: pd.DataFrame) -> pd.DataFrame:
"""apply normalization to X Y Z
Args:
data (pd.DataFrame): data to normalize
Returns:
pd.DataFrame: normalized data
"""
df = data.copy()
self.preprocessor_params["normalization"] = {}
for axis in ["X", "Y", "Z"]:
df[axis], axis_params = _normalize(df[axis])
self.preprocessor_params["normalization"][axis] = axis_params
return df
def _standardize_v(self, data: pd.DataFrame) -> pd.DataFrame:
"""apply standardization to V"""
df = data.copy()
df["V"], params = _standardize(df["V"])
self.preprocessor_params["standardization"] = params
return df
def _downsample_data(self, data: pd.DataFrame) -> pd.DataFrame:
"""downsample data making the average by blocks of given resolution
TODO: add option to downsample by selecting the statistic
taking the median, or the max
Args:
data (pd.DataFrame): data to downsample
Returns:
pd.DataFrame: downsampled data
"""
self.preprocessor_params["downsampling"] = {"resolution": self.downsampling_res}
idfs = []
# loop over unique ids
for id in self.griddata.data.index.get_level_values("ID").unique():
# filter by id
idf = self.griddata.data.loc[
self.griddata.data.index.get_level_values("ID") == id
].reset_index()
# save x,y values
x = idf["X"][0]
y = idf["Y"][0]
# extract z
idf = idf[["Z", "V"]]
# downsample by grouping in blocks of given resolution
# and taking the mean
idf = idf.groupby(
idf["Z"].apply(
lambda x: self.preprocessor_params["downsampling"]["resolution"]
* round(x / self.preprocessor_params["downsampling"]["resolution"])
)
)[["V"]].mean()
# new downsampled df
idf["X"] = x
idf["Y"] = y
idf["ID"] = id
idf.reset_index(inplace=True) # reset index resulting from groupby
idfs.append(idf)
# return downsampled grid data
return pd.concat(idfs)
def _standardize(series: pd.Series) -> tuple:
"""standardize series to have mean 0 and std 1
Args:
series (pd.Series): series to standardize
Returns:
tuple: standardized series and standardization parameters
"""
series = series.copy()
# save standardization parameters
params = {
"mean": series.mean(),
"std": series.std(),
}
# standardize
series = (series - params["mean"]) / (params["std"])
return series, params
def _normalize(series: pd.Series) -> tuple:
"""normalize series between 0 and 1
Args:
series (pd.Series): series to normalize
Returns:
tuple: normalized series and normalization parameters
"""
series = series.copy()
# save normalization parameters
params = {
"min": series.min(),
"max": series.max(),
}
# normalize
series = (series - params["min"]) / (params["max"] - params["min"])
return series, params
def reverse_preprocessing(griddata: GridData) -> GridData:
"""reverse preprocessing of whole GridData object
reverse all reversible transformations that have been
applied to the GridData object.
This method reverses the operations of preprocessing of:
- Normalization of X Y Z
- Standardization of V
Note:
It cannot reverse downsampling!
Returns:
GridData: GridData object with the data with reversed preprocessing
"""
if griddata.preprocessor_params is None:
raise ValueError("No preprocessing has been applied to the data")
else:
# get data
data = griddata.data.copy().reset_index()
if "normalization" in griddata.preprocessor_params:
# reverse normalization of X Y Z
# original_value = normalized_value * (max_value - min_value) + min_value
for axis in ["X", "Y", "Z"]:
data[axis] = (
data[axis]
* (
griddata.preprocessor_params["normalization"][axis]["max"]
- griddata.preprocessor_params["normalization"][axis]["min"]
)
+ griddata.preprocessor_params["normalization"][axis]["min"]
)
if "standardization" in griddata.preprocessor_params:
# reverse standardization of V
data["V"] = (
data["V"] * griddata.preprocessor_params["standardization"]["std"]
+ griddata.preprocessor_params["standardization"]["mean"]
)
# return
return GridData(data)
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.