Giter VIP home page Giter VIP logo

py3dinterpolations's Introduction

Hi there ๐Ÿ‘‹

giocaizzi's Top Languages

Contact:

medium medium medium

py3dinterpolations's People

Contributors

giocaizzi avatar

Stargazers

 avatar

Watchers

 avatar

py3dinterpolations's Issues

test plotting: assert each axes has 2 lines

https://api.github.com/giocaizzi/py3dinterpolations/blob/4544c573a8e987c599122c5a0906450b7e2bfa18/tests/plotting/test_plotting.py#L43

    ).preprocess()

    # patch the reverse_preprocessing function
    # returning  a GriddData object
    # created with a sample of the test_data
    with patch(
        "py3dinterpolations.plotting.plotting.reverse_preprocessing",
        return_value=GridData(test_data.sample(n=50, random_state=42)),
    ):
        fig = plot_downsampling(gd, preprocessed_gd)

    # assert is figure
    assert isinstance(fig, Figure)

    # assert figure has n visible axes
    n = len(gd.data.index.get_level_values("ID").unique())
    visible_axes = [ax for ax in fig.axes if ax.get_visible()]
    assert len(visible_axes) == n

    # # TODO assert each axes has 2 lines

krige wrapper, method key param

that is not needed for the estimator

find a flexible way to handle this, probably a good way is to use directly

the sci-kit wrapper Krige()

https://api.github.com/giocaizzi/py3dinterpolations/blob/1edd1fcdb4b5241757cdc617d0adf5732a5f9a2e/py3dinterpolations/modelling/interpolate.py#L105

"""interpolator"""

from typing import Union, Tuple
import numpy as np

from ..core.griddata import GridData
from ..core import create_regulargrid3d_from_griddata
from .modeler import Modeler
from .preprocessor import Preprocessor
from .estimator import Estimator


def interpolate(
    griddata: GridData,
    model_name: str,
    grid_resolution: float,
    model_params: dict = {},
    model_params_grid: dict = {},
    preprocess_kwags: dict = {},
    predict_kwags: dict = {},
    return_model: bool = False,
) -> Union[np.ndarray, Tuple[np.ndarray, Modeler]]:
    """interpolate griddata

    Interpolate griddata using a Modeler instance that wraps all supported
    models. The model is selected with the argument `model_name`.

    If the `model_params` is passed, then the model is initialized with those
    parameters. Otherwise, to make a search for the best parameters, use the
    `model_params_grid` argument.

    The 3D grid for interpolation is retrived from the training data.
    At the moment features only a regular grid.

    If requested, the griddata is preprocessed using the Preprocessor class.

    Args:
        griddata (GridData): griddata to interpolate
        model_name (str): model name
        grid_resolution (float): grid resolution
        model_params (dict, optional): model parameters. Defaults to {}.
        preprocess_kwags (dict, optional): preprocessing parameters. Defaults to {}.
        predict_kwags (dict, optional): prediction parameters. Defaults to {}.
        return_model (bool, optional): return model. Defaults to False.

    Returns:
        Union[np.ndarray, Tuple[np.ndarray, Modeler]]:
            interpolated griddata, optionally with model

    Raises:
        ValueError: either model_params or model_params_grid must be passed
        ValueError: model_params and model_params_grid cannot be passed together
        NotImplementedError: only RegularGrid3D is supported.
        NotImplementedError: Parameter search is only supported for ordinary_kriging

    Examples:
        >>> # interpolate griddata
        >>> interpolated = interpolate(
        >>>     griddata,
        >>>     model_name="ordinary_kriging",
        >>>     grid_resolution=5,
        >>>     model_params={
        >>>         "variogram_model": "linear",
        >>>         "nlags": 6,
        >>>         "weight": True,
        >>>     },
        >>>     preprocess_kwags={
        >>>         "downsampling_res": 0.1,
        >>>         "normalize_xyz": True,
        >>>         "standardize_v": True,
        >>>     },
        >>> )

    """
    # check model_params and model_params_grid
    if model_params == {} and model_params_grid == {}:
        raise ValueError("either model_params or model_params_grid must be passed")
    if model_params != {} and model_params_grid != {}:
        raise ValueError("model_params and model_params_grid cannot be passed together")

    # check grid_resolution is of supported type
    if isinstance(grid_resolution, float) or isinstance(grid_resolution, int):
        # retrive associated grid
        grid3d = create_regulargrid3d_from_griddata(griddata, grid_resolution)
    else:
        raise NotImplementedError("only RegularGrid3D is supported.")

    # preprocess griddata if needed
    if preprocess_kwags != {}:
        # preprocessor
        preprocessor = Preprocessor(griddata, **preprocess_kwags)
        # get new griddata
        griddata = preprocessor.preprocess()

    if model_params == {}:
        # implemented only for ordinary_kriging
        if model_name != "ordinary_kriging":
            raise NotImplementedError(
                "Parameter search is only supported for ordinary_kriging"
            )

        # estimate
        est = Estimator(griddata, model_params_grid)

        # TODO: krige wrapper, method key param
        #   that is not needed for the estimator
        #   find a flexible way to handle this, probably a good way is to use directly
        #   the sci-kit wrapper Krige()
        model_params = est.best_params
        model_params.pop("method")

    # init Modeler
    model = Modeler(
        griddata=griddata,
        grid3d=grid3d,
        model_name=model_name,
        model_params=model_params,
    )

    # make predictions
    predictions = model.predict(**predict_kwags)

    # return model
    if return_model:
        return predictions, model
    else:
        return predictions

pickled model

when pickling the model, the mesh attribute of grid3d is empty, although was calculated initially

IDW: Remove loopings use a vectorized approach

Args:

x (np.ndarray): x coordinates of the points where the model will be evaluated

y (np.ndarray): y coordinates of the points where the model will be evaluated

z (np.ndarray): z coordinates of the points where the model will be evaluated

values (np.ndarray): values of the points where the model will be evaluated

power (float): power of the IDW model

Attributes:

power (float): power of the IDW model

distance_matrix (np.ndarray): distance matrix between

observations and data points

https://api.github.com/giocaizzi/py3dinterpolations/blob/4544c573a8e987c599122c5a0906450b7e2bfa18/py3dinterpolations/modelling/models/idw.py#L18

"""Inverse Distance Weighting (IDW) model"""

import numpy as np
import math

from .deterministic import DeterministicModel


class IDW(DeterministicModel):
    """Simple IDW Model object

    IDW: Inverse Distance Weigthting

    Offers tweaking of the power parameter of the IDW model.

    Could be really slow for large datasets, due to the use of loops.

    TODO: Remove loopings use a vectorized approach

    Args:
        x (np.ndarray): x coordinates of the points where the model will be evaluated
        y (np.ndarray): y coordinates of the points where the model will be evaluated
        z (np.ndarray): z coordinates of the points where the model will be evaluated
        values (np.ndarray): values of the points where the model will be evaluated
        power (float): power of the IDW model

    Attributes:
        power (float): power of the IDW model
        distance_matrix (np.ndarray): distance matrix between
            observations and data points
    """

    def __init__(
        self,
        x: np.ndarray,
        y: np.ndarray,
        z: np.ndarray,
        values: np.ndarray,
        power: float = 1,
    ):
        # initialize parent class
        super().__init__(x=x, y=y, z=z, values=values)

        # set power
        self.power = power

    def _compute_point(
        self,
        x: float,
        y: float,
        z: float,
        power: float,
        threshold: float = 0.0000000001,
    ) -> float:
        """find value at a point

        Loops through all the data points to estimate the value at a point
        based on the Inverse Distance Weighting (IDW) method.
        Might be really slow for large datasets.

        This implementation however offers easy advanced parametrization with:
        - n_points: number of points to use for the IDW estimation (TODO)
        - max_distance: maximum distance to use for the IDW estimation (TODO)
        """
        nominator = 0
        denominator = 0

        # distance in 3d
        for i in range(0, len(self.values)):
            dist = math.sqrt(
                (x - self.x[i]) * (x - self.x[i])
                + (y - self.y[i]) * (y - self.y[i])
                + (z - self.z[i]) * (z - self.z[i])
            )

            # If the point is really close to one of the data points,
            # return the data point value to avoid singularities
            # EXACT interpolations
            if dist < threshold:
                return self.values[i]

            nominator = nominator + (self.values[i] / pow(dist, power))
            denominator = denominator + (1 / pow(dist, power))

        # Return NaN if the denominator is zero
        if denominator > 0:
            value = nominator / denominator
        else:
            value = np.nan
        return value

    def compute(
        self, gridx: np.ndarray, gridy: np.ndarray, gridz: np.ndarray
    ) -> np.ndarray:
        meshx, meshy, meshz = np.meshgrid(gridx, gridy, gridz, indexing="xy")

        # Create a new mesh to store the coordinates
        new_mesh = np.zeros_like(meshx)

        # Iterate over the indices of the arrays
        for i in range(meshx.shape[0]):
            for j in range(meshx.shape[1]):
                for k in range(meshx.shape[2]):
                    # Store the coordinates in the new mesh
                    new_mesh[i, j, k] = self._compute_point(
                        x=meshx[i, j, k],
                        y=meshy[i, j, k],
                        z=meshz[i, j, k],
                        power=self.power,
                    )
        return new_mesh

interpolate function

  • techincally returning "interpolated" is not required as it is stored in model.results

Restructured interpolate

Implement IDW parametrization

This implementation however offers easy advanced parametrization with:

  • n_points: number of points to use for the IDW estimation

  • max_distance: maximum distance to use for the IDW estimation

https://api.github.com/giocaizzi/py3dinterpolations/blob/fe1ba357af32f67cf72cd6a5cfa0c24b14acc115/py3dinterpolations/modelling/models/idw.py#L61

"""Inverse Distance Weighting (IDW) model"""

import numpy as np
import math

from .deterministic import DeterministicModel


class IDW(DeterministicModel):
    """Simple IDW Model object

    IDW: Inverse Distance Weigthting

    Offers tweaking of the power parameter of the IDW model.

    Could be really slow for large datasets, due to the use of loops.

    TODO: Remove loopings use a vectorized approach

    Args:
        x (np.ndarray): x coordinates of the points where the model will be evaluated
        y (np.ndarray): y coordinates of the points where the model will be evaluated
        z (np.ndarray): z coordinates of the points where the model will be evaluated
        values (np.ndarray): values of the points where the model will be evaluated
        power (float): power of the IDW model

    Attributes:
        power (float): power of the IDW model
        distance_matrix (np.ndarray): distance matrix between
            observations and data points
    """

    def __init__(
        self,
        x: np.ndarray,
        y: np.ndarray,
        z: np.ndarray,
        values: np.ndarray,
        power: float = 1,
    ):
        # initialize parent class
        super().__init__(x=x, y=y, z=z, values=values)

        # set power
        self.power = power

    def _compute_point(
        self,
        x: float,
        y: float,
        z: float,
        power: float,
        threshold: float = 0.0000000001,
    ) -> float:
        """find value at a point

        Loops through all the data points to estimate the value at a point
        based on the Inverse Distance Weighting (IDW) method.
        Might be really slow for large datasets.

        TODO: Implement parametrization
            This implementation however offers easy advanced parametrization with:
            - n_points: number of points to use for the IDW estimation
            - max_distance: maximum distance to use for the IDW estimation
        """
        nominator = 0
        denominator = 0

        # distance in 3d
        for i in range(0, len(self.values)):
            dist = math.sqrt(
                (x - self.x[i]) * (x - self.x[i])
                + (y - self.y[i]) * (y - self.y[i])
                + (z - self.z[i]) * (z - self.z[i])
            )

            # If the point is really close to one of the data points,
            # return the data point value to avoid singularities
            # EXACT interpolations
            if dist < threshold:
                return self.values[i]

            nominator = nominator + (self.values[i] / pow(dist, power))
            denominator = denominator + (1 / pow(dist, power))

        # Return NaN if the denominator is zero
        if denominator > 0:
            value = nominator / denominator
        else:
            value = np.nan
        return value

    def compute(
        self, gridx: np.ndarray, gridy: np.ndarray, gridz: np.ndarray
    ) -> np.ndarray:
        meshx, meshy, meshz = np.meshgrid(gridx, gridy, gridz, indexing="xy")

        # Create a new mesh to store the coordinates
        new_mesh = np.zeros_like(meshx)

        # Iterate over the indices of the arrays
        for i in range(meshx.shape[0]):
            for j in range(meshx.shape[1]):
                for k in range(meshx.shape[2]):
                    # Store the coordinates in the new mesh
                    new_mesh[i, j, k] = self._compute_point(
                        x=meshx[i, j, k],
                        y=meshy[i, j, k],
                        z=meshz[i, j, k],
                        power=self.power,
                    )
        return new_mesh

add option to downsample by selecting the statistic

taking the median, or the max

Args:

data (pd.DataFrame): data to downsample

Returns:

pd.DataFrame: downsampled data

https://api.github.com/giocaizzi/py3dinterpolations/blob/1edd1fcdb4b5241757cdc617d0adf5732a5f9a2e/py3dinterpolations/modelling/preprocessor.py#L141

"""preprocessor module"""

import pandas as pd

from ..core import GridData
from typing import Union


class Preprocessor:
    """Preprocessor class

    Preprocess data before interpolation. Preprocessing includes:
        - Downsampling, by taking the mean of blocks of given resolution
        - Normalization of X Y Z
        - Standardization of V

    By calling the preprocess method, the data is preprocessed and a new
    GridData object is returned with the preprocessed data and the
    preprocessing parameters set.

    Args:
        griddata (GridData): GridData object to preprocess
        downsampling_res (Union[float, None]): resolution to downsample data
            by taking the mean of blocks of given resolution. If None, no
            downsampling is applied. Default is None.
        normalize_xyz (bool): whether to normalize X Y Z. Default is True.
        standardize_v (bool): whether to standardize V. Default is True.

    Attributes:
        griddata (GridData): GridData object to preprocess
        downsampling_res (Union[float, None]): resolution to downsample data
            by taking the mean of blocks of given resolution. If None, no
            downsampling is applied. Default is None.
        normalize_xyz (bool): whether to normalize X Y Z. Default is True.
        standardize_v (bool): whether to standardize V. Default is True.
        preprocessor_params (dict): dictionary with the parameters of the
            preprocessing. It is set after calling the preprocess method.::

                preprocessor_params = {
                    "downsampling": {
                        "resolution": downsampling_res,
                    },
                    "normalization": {
                        "X": {
                            "min": min_value_of_X,
                            "max": max_value_of_X,
                        },
                        "Y": {
                            "min": min_value_of_Y,
                            "max": max_value_of_Y,
                        },
                        "Z": {
                            "min": min_value_of_Z,
                            "max": max_value_of_Z,
                        },
                    },
                    "standardization": {
                        "mean": mean_value_of_V,
                        "std": std_value_of_V,
                    },

                }

    Examples:
        >>> # preprocess data
        >>> preprocessor = Preprocessor(
        >>>     griddata,
        >>>     downsampling_res=0.1,
        >>>     normalize_xyz=True,
        >>>     standardize_v=True,
        >>> )
        >>> griddata = preprocessor.preprocess()
    """

    preprocessor_params = {}

    def __init__(
        self,
        griddata: GridData,
        downsampling_res: Union[float, None] = None,
        normalize_xyz: bool = True,
        standardize_v: bool = True,
    ):
        self.griddata = griddata
        self.downsampling_res = downsampling_res
        self.normalize_xyz = normalize_xyz
        self.standardize_v = standardize_v

    def preprocess(self) -> GridData:
        """preprocess data

        Returns:
            GridData: new GridData object with the preprocessed data
                and prerpocessing parameters set

        """
        # get data
        data = self.griddata.data.copy().reset_index()[["ID", "X", "Y", "Z", "V"]]

        # first dowmsample
        if self.downsampling_res is not None:
            data = self._downsample_data(data)

        # normalize
        if self.normalize_xyz:
            data = self._normalize_xyz(data)

        # standardize
        if self.standardize_v:
            data = self._standardize_v(data)

        # return new object with preprocessed data and parameters
        return GridData(data, preprocessor_params=self.preprocessor_params)

    def _normalize_xyz(self, data: pd.DataFrame) -> pd.DataFrame:
        """apply normalization to X Y Z

        Args:
            data (pd.DataFrame): data to normalize

        Returns:
            pd.DataFrame: normalized data
        """
        df = data.copy()
        self.preprocessor_params["normalization"] = {}
        for axis in ["X", "Y", "Z"]:
            df[axis], axis_params = _normalize(df[axis])
            self.preprocessor_params["normalization"][axis] = axis_params
        return df

    def _standardize_v(self, data: pd.DataFrame) -> pd.DataFrame:
        """apply standardization to V"""
        df = data.copy()
        df["V"], params = _standardize(df["V"])
        self.preprocessor_params["standardization"] = params
        return df

    def _downsample_data(self, data: pd.DataFrame) -> pd.DataFrame:
        """downsample data making the average by blocks of given resolution

        TODO: add option to downsample by selecting the statistic
            taking the median, or the max

        Args:
            data (pd.DataFrame): data to downsample

        Returns:
            pd.DataFrame: downsampled data
        """
        self.preprocessor_params["downsampling"] = {"resolution": self.downsampling_res}
        idfs = []
        # loop over unique ids
        for id in self.griddata.data.index.get_level_values("ID").unique():
            # filter by id
            idf = self.griddata.data.loc[
                self.griddata.data.index.get_level_values("ID") == id
            ].reset_index()
            # save x,y values
            x = idf["X"][0]
            y = idf["Y"][0]
            # extract z
            idf = idf[["Z", "V"]]
            # downsample by grouping in blocks of given resolution
            # and taking the mean
            idf = idf.groupby(
                idf["Z"].apply(
                    lambda x: self.preprocessor_params["downsampling"]["resolution"]
                    * round(x / self.preprocessor_params["downsampling"]["resolution"])
                )
            )[["V"]].mean()
            # new downsampled df
            idf["X"] = x
            idf["Y"] = y
            idf["ID"] = id
            idf.reset_index(inplace=True)  # reset index resulting from groupby
            idfs.append(idf)
        # return downsampled grid data

        return pd.concat(idfs)


def _standardize(series: pd.Series) -> tuple:
    """standardize series to have mean 0 and std 1

    Args:
        series (pd.Series): series to standardize

    Returns:
        tuple: standardized series and standardization parameters
    """
    series = series.copy()
    # save standardization parameters
    params = {
        "mean": series.mean(),
        "std": series.std(),
    }
    # standardize
    series = (series - params["mean"]) / (params["std"])
    return series, params


def _normalize(series: pd.Series) -> tuple:
    """normalize series between 0 and 1

    Args:
        series (pd.Series): series to normalize

    Returns:
        tuple: normalized series and normalization parameters
    """
    series = series.copy()
    # save normalization parameters
    params = {
        "min": series.min(),
        "max": series.max(),
    }
    # normalize
    series = (series - params["min"]) / (params["max"] - params["min"])
    return series, params


def reverse_preprocessing(griddata: GridData) -> GridData:
    """reverse preprocessing of whole GridData object

    reverse all reversible transformations that have been
    applied to the GridData object.

    This method reverses the operations of preprocessing of:
        - Normalization of X Y Z
        - Standardization of V

    Note:
        It cannot reverse downsampling!

    Returns:
        GridData: GridData object with the data with reversed preprocessing
    """
    if griddata.preprocessor_params is None:
        raise ValueError("No preprocessing has been applied to the data")
    else:
        # get data
        data = griddata.data.copy().reset_index()
        if "normalization" in griddata.preprocessor_params:
            # reverse normalization of X Y Z
            # original_value = normalized_value * (max_value - min_value) + min_value
            for axis in ["X", "Y", "Z"]:
                data[axis] = (
                    data[axis]
                    * (
                        griddata.preprocessor_params["normalization"][axis]["max"]
                        - griddata.preprocessor_params["normalization"][axis]["min"]
                    )
                    + griddata.preprocessor_params["normalization"][axis]["min"]
                )
        if "standardization" in griddata.preprocessor_params:
            # reverse standardization of V
            data["V"] = (
                data["V"] * griddata.preprocessor_params["standardization"]["std"]
                + griddata.preprocessor_params["standardization"]["mean"]
            )
            # return
        return GridData(data)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.