Source code for pymarine.utils.numerical

"""
Some numerical utilities used in other modules
"""

import re

import numpy as np
import scipy.io as spio


[docs] def ecdf2percentile(ecdf, percentile): """Calculate a percentile of an Empirical CDF function as returned by the `statsmodels` package Parameters ---------- ecdf : :obj:`statsmodel.distributions.empirical_distribution.ECDF` Empirical CDF as a step function. percentile : float Percentile of the distribution Returns ------- float: value where a fraction of `percentile` will be lower Examples -------- >>> import statsmodels.api as sm >>> np.random.seed(0) >>> number_of_observations = 100 >>> # generate random data variing in between 0 and 100 >>> x_data = 100 * np.random.rand(number_of_observations) >>> # calculate the cumulative distribution function of this random data >>> e_cdf = sm.distributions.empirical_distribution.ECDF(x_data) >>> # print some P values belonging to the percentiles >>> print("percentile {:2.1f} : ecdf ={:6.2f}".format(0.1, ... ecdf2percentile(ecdf=e_cdf, percentile=0.1))) percentile 0.1 : ecdf = 9.61 >>> print("percentile {:2.1f} : ecdf ={:6.2f}".format(0.5, ... ecdf2percentile(ecdf=e_cdf, percentile=0.5))) percentile 0.5 : ecdf = 46.87 >>> print("percentile {:2.1f} : ecdf ={:6.2f}".format(0.9, ... ecdf2percentile(ecdf=e_cdf, percentile=0.9))) percentile 0.9 : ecdf = 89.18 """ for i in range(ecdf.n): if ecdf.y[i] > percentile: return ecdf.x[i]
[docs] def get_parameter_list_key(parlist): """Utility for the qtgraphs Parameter Item Parameters ---------- parlist : The parameter tree list (is an ordered list) contain all the values and current one Returns ------- str The name belonging to the current value Notes ----- The parameter tree widget has a field 'list' in which a list of values is given with corresponding integers. The current integer belonging to the parlist is obtained by parlist.value() however, to get the associated value of the key field is less straightforward In this routine it is retrieved """ # the current value of the parlist value = parlist.value() # get the reverse list from the parlist reverselist = parlist.reverse # get the index belonging to the current value index = reverselist[0].index(value) # get the name of the key of this index keyname = reverselist[1][index] return keyname
[docs] def get_column_with_max_cumulative_value(data, regular_expression=".*"): """Find the column of a pandas DataFrame with the maximum cumulative value Parameters ---------- data : DataFrame Data frame with the columns regular_expression : str Regular expression used to make a selection of columns to include. Default to '.*', which means that all columns are included Returns ------- str or None: The name of the column with the maximum cumulative value or None if no columns were found Notes ----- * Only the columns with a name obeying the regular expression are taken into account * An example of usage can be found in the fatigue monitoring software where we have data frames with damage over all the channels at a hot spots. If you want to obtained the channel with the maximum cumulative damage you can use this function Examples -------- >>> import string >>> import pandas as pd >>> np.random.seed(0) >>> n_cols = 5 >>> n_rows = 10 Create a 10 x 5 data frame with random values with columns named as A, B, C, etc >>> data_frame = pd.DataFrame(np.random.random_sample((n_rows, n_cols)), ... columns=list(string.ascii_uppercase)[:n_cols]) Obtain the name of the column with the maximum cumulative value >>> get_column_with_max_cumulative_value(data_frame) 'D' Obtain the name of the column with the maximum cumulative value only including colums A, B and C >>> get_column_with_max_cumulative_value(data_frame, regular_expression="[ABC]") 'C' """ columns = list() for col in data.columns.values: if re.match(regular_expression, col): columns.append(col) if columns: name_of_maximum_column = data.fillna(0).sum()[columns[0] : columns[-1]].idxmax() else: name_of_maximum_column = None return name_of_maximum_column
[docs] def nans(shape, dtype=float): """Create an array filled with :class:`numpy.nan` values Parameters ---------- shape : tuple Shape of the array of nans to create dtype : `numpy.dtype`, optional, default=float Type of the nan Returns ------- ndarray Array filled with `numpy.nan` values Examples -------- >>> nans((2, 3)) array([[ nan, nan, nan], [ nan, nan, nan]]) """ # create an array of nans with give shape arr = np.empty(shape, dtype) arr.fill(np.nan) return arr
[docs] def get_nearest_index(data, value): """ Find the index of the first occurrence of a value in a array with monotonically increasing values Parameters ---------- data : array_like Array with monotonically increasing numbers value : Value of the point to give the index Returns ------- int Index of floor cell where the values in the array first exceed the value Example ------- >>> data_array = np.linspace(0, 10, 8, endpoint=False) >>> print(data_array) [ 0. 1.25 2.5 3.75 5. 6.25 7.5 8.75] >>> get_nearest_index(data_array, value=3) 2 >>> get_nearest_index(data_array, value=3.5) 2 >>> get_nearest_index(data_array, value=8.5) 6 >>> get_nearest_index(data_array, value=10) 7 >>> get_nearest_index(data_array, value=-1) 0 In case of a non-increasing array an AssertionError is raised >>> data_array = np.array([3, 0, 2, 4, 2.11, 1]) >>> try: ... get_nearest_index(data_array, value=2.1) ... except AssertionError as err: ... print("An assertion error was raised") An assertion error was raised Raises ------ AssertionError In case the array does not have monotonically increasing values Notes ----- * Only arrays with monotonically increasing values are allowed. * In case the nearest index of an arbitrary array is needed, `find_inx_nearest_val` should be used * In case the `value` a larger than any value in the `data` array, the maximum index is returned, while in case the `value` is smaller than any value in the `data` array, a zero is return See Also -------- find_idx_nearest_val: a function to get the index of the nearest value for an arbitraray array """ data_array = np.asanyarray(data) if (np.diff(data_array) < 0).any(): raise AssertionError( "Only monotonic increasing arrays are allowed. " "Use `find_inx_nearest_val` instead " ) sgn = np.sign(np.asarray(data) - value) try: nearest_index = np.where(np.diff(sgn) > 0)[0][0] except IndexError: if sgn[0] > 0: nearest_index = 0 else: nearest_index = data.size - 1 return nearest_index
[docs] def find_idx_nearest_val(array, value): """Find the nearest index of a value in a array. Parameters ---------- array : array_like an array with values value : float the value for which we want the nearest index Returns ------- int The index of the nearest value to 'value' in array. the number of items in the array Examples -------- >>> data_array = np.linspace(0, 10, 8, endpoint=False) >>> print(data_array) [ 0. 1.25 2.5 3.75 5. 6.25 7.5 8.75] >>> find_idx_nearest_val(data_array, value=3) 2 >>> find_idx_nearest_val(data_array, value=3.5) 3 >>> find_idx_nearest_val(data_array, value=8.5) 7 >>> find_idx_nearest_val(data_array, value=10) 7 >>> find_idx_nearest_val(data_array, value=-1) 0 In case of a non-increasing array with equal candidates, the last one is returned >>> data_array = np.array([3, 0, 2, 4, 2, 1]) >>> find_idx_nearest_val(data_array, value=2.1) 4 >>> data_array = np.array([3, 0, 2, 4, 2, 1, 2]) >>> find_idx_nearest_val(data_array, value=2.1) 6 If we change the first candidate, so it becomes the nearest, this one will be returned >>> data_array = np.array([3, 0, 2.09, 4, 2, 1]) >>> find_idx_nearest_val(data_array, value=2.1) 2 Notes ----- * In case that 2 or more items exist with the same distance from `value`, the *last* occurrence is returned * In case the value is outside the range of any value inside the array, either 0 or N-1 is return, with N the number of array elements of the input array See Also -------- get_nearest_index: This function returns the first occurrence and only works for monotonically increasing arrays """ idx_sorted = np.argsort(array) sorted_array = np.array(array[idx_sorted]) idx = np.searchsorted(sorted_array, [value], side="left") try: # in case search sorted returns a list of values take the first one idx = idx[0] except IndexError: # it was not a list, so do't do anything pass if idx >= len(array): idx_nearest = idx_sorted[len(array) - 1] elif idx == 0: idx_nearest = idx_sorted[0] else: if abs(value - sorted_array[idx - 1]) < abs(value - sorted_array[idx]): idx_nearest = idx_sorted[idx - 1] else: idx_nearest = idx_sorted[idx] return idx_nearest
[docs] def get_range_from_string(range_string): """Analyse a range string to get the start, end and step and return a numpy array Parameters ---------- range_string: str, start:end:[step] String representing the start, end and step size of a range Returns ------- ndarray: 1-D Array with points defined by range_string Examples -------- >>> get_range_from_string("0:10:2") array([ 0., 2., 4., 6., 8., 10.]) >>> get_range_from_string("0:7") array([ 0., 1., 2., 3., 4., 5., 6., 7.]) >>> get_range_from_string("3:4:0.2") array([ 3. , 3.2, 3.4, 3.6, 3.8, 4. ]) """ values = range_string.split(":") try: start = values[0] stop = values[1] try: spacing = values[2] except IndexError: spacing = 1.0 except IndexError: raise AssertionError("Usage supply string 'start:end:[step]'") try: start = float(start) stop = float(stop) spacing = float(spacing) except ValueError: raise AssertionError( "Could not convert one of the value to a float from string {}" "".format(range_string) ) n_points = int((stop - start) / spacing) + 1 return np.linspace(start, stop, n_points)
[docs] def make_2d_array_cyclic(data_2d, axis=0, add_constant=0.0): """Makes a 2D array period by copying the first row toward the end Parameters ---------- data_2d : ndarray NxM array carrying the data. It is assumed that the data along one of the axis is periodic and that we want to make the array cyclic by copying the first row or column to the end axis: int Axis to make periodic. Default = 0 add_constant : float Add this constant to the copy row or column to allow to add 2*pi or 360 Returns ------- ndarray Same 2d data array with one extra column (N+1 x M for axis == 0) or one extra row (N x M + 1 for axis == 1) Examples -------- First make some 2D data array using mesh grid containing the direction we want to make periodic >>> directions = np.linspace(0, 360, 6, endpoint=False) >>> frequencies = np.linspace(0, 3, 3) >>> dd, ff = np.meshgrid(directions, frequencies) >>> dd array([[ 0., 60., 120., 180., 240., 300.], [ 0., 60., 120., 180., 240., 300.], [ 0., 60., 120., 180., 240., 300.]]) The array `dd` is periodic along the axis = 0 direction. So copy the first column to the end >>> dd_periodic = make_2d_array_cyclic(dd) >>> dd_periodic array([[ 0., 60., 120., 180., 240., 300., 0.], [ 0., 60., 120., 180., 240., 300., 0.], [ 0., 60., 120., 180., 240., 300., 0.]]) We can do the same in case we have the transposed version, only we have to use the axis = 1 argument to pick the right axis >>> dd_tr = dd.T >>> dd_tr array([[ 0., 0., 0.], [ 60., 60., 60.], [ 120., 120., 120.], [ 180., 180., 180.], [ 240., 240., 240.], [ 300., 300., 300.]]) >>> dd_tr_periodic = make_2d_array_cyclic(dd_tr, axis=1) >>> dd_tr_periodic array([[ 0., 0., 0.], [ 60., 60., 60.], [ 120., 120., 120.], [ 180., 180., 180.], [ 240., 240., 240.], [ 300., 300., 300.], [ 0., 0., 0.]]) For polar plotting it is required that the angle is increasing, which means that the last 0 needs to be 360. This can be established by using the `add_constant` option >>> dd_periodic = make_2d_array_cyclic(dd, add_constant=360.) >>> dd_periodic array([[ 0., 60., 120., 180., 240., 300., 360.], [ 0., 60., 120., 180., 240., 300., 360.], [ 0., 60., 120., 180., 240., 300., 360.]]) Notes ----- This function can be used to ensure that a 2D data array with directions at one axis can be easily made periodic into the direction axis. This is a requirement to make a polar plot """ if axis == 0: # Create a Mx1 array of the first column of the 2D data array first_col = data_2d[:, 0].reshape(data_2d.shape[0], 1) + add_constant data_2d_periodic = np.hstack((data_2d, first_col)) elif axis == 1: # create a 1xN array of the first row of the 2D data array first_row = data_2d[0, :].reshape(1, data_2d.shape[1]) + add_constant data_2d_periodic = np.vstack((data_2d, first_row)) else: raise AssertionError(f"Argument `axis` can only be 0 or 1. {axis} given.") return data_2d_periodic
[docs] def extrap1d(interpolator): """ Extrapolate the interp1d function outside the boundaries .. deprecated:: 0.3.4 Deprecated function, the scipy *interp1d* now can extrapolate as well. This function is maintained for backward compatibility. Parameters ---------- interpolator: interp1d The interpolator object created with scipy Returns ------- function New interpolator object that extrapolates values outside the range Examples -------- Assumed you have a x and y array you want to interpolate. You can use the scipy interp1d function for that >>> from scipy.interpolate import interp1d >>> xp = np.linspace(0, 3, 4) >>> yp = xp**2 >>> print(np.vstack((xp, yp))) [[ 0. 1. 2. 3.] [ 0. 1. 4. 9.]] Use the data samples to create a Interpolator *f_inter* >>> f_inter = interp1d(xp, yp) Interpolation on a new mesh within the boundaries of the previous mesh can be done as >>> xp_new = np.linspace(0, 3, 6) >>> yp_new = f_inter(xp_new) >>> print(np.vstack((xp_new, yp_new))) [[ 0. 0.6 1.2 1.8 2.4 3. ] [ 0. 0.6 1.6 3.4 6. 9. ]] However, perhaps you want to extend the boundaries outside the initial boundaries. In that case you can use extrap1 in order to extrapolate the function outside the boundaries. First create the Extrapolator using the scipy Interpolator >>> f_extra = extrap1d(f_inter) Now we can also extrapolate outside the range >>> xp_new2 = np.linspace(0, 4, 9) >>> yp_new2 = f_extra(xp_new2) >>> print(np.vstack((xp_new2, yp_new2))) [[ 0. 0.5 1. 1.5 2. 2.5 3. 3.5 4. ] [ 0. 0.5 1. 2.5 4. 6.5 9. 11.5 14. ]] In the latest scipy version extrapolation is possible with the *interp1d* function as well : >>> f_extra2 = interp1d(xp, yp, fill_value="extrapolate") >>> yp_new3 = f_extra2(xp_new2) >>> print(np.vstack((yp_new2, yp_new3))) [[ 0. 0.5 1. 2.5 4. 6.5 9. 11.5 14. ] [ 0. 0.5 1. 2.5 4. 6.5 9. 11.5 14. ]] As you can see, the result is the same as the *extrap1d* function. The native scipy.interp1d with *extrapolate* as *fill_values* is recommended; the *extrap1d* function is only kept for backward compatibility and may be dropped soon We can plot the results and compare it with the original data line .. plot:: >>> import matplotlib.pyplot as plt >>> l = plt.plot(xp, yp, "o", label="samples") >>> l = plt.plot(xp_new2, yp_new2, "-x", label="extrap1d") >>> xp_new3 = np.linspace(0, 4, 50) >>> yp_new3 = xp_new3**2 >>> l = plt.plot(xp_new3, yp_new3, "-", label="original") >>> l = plt.legend() >>> plt.ion() >>> plt.show() Notes ----- * In case you want to use interpolate with values outside of the boundaries, this function allows to extrapolate outside the boundaries as described here interp_ References ---------- http://stackoverflow.com/questions/2745329/ how-to-make-scipy-interpolate-give-an-extrapolated-result-beyond-the-input-range https://stackoverflow.com/a/37172840/4515114 .. _interp: http://stackoverflow.com/questions/2745329/ how-to-make-scipy-interpolate-give-an-extrapolated-result-beyond-the-input-range """ xs = interpolator.x ys = interpolator.y def pointwise(x): if x < xs[0]: return ys[0] + (x - xs[0]) * (ys[1] - ys[0]) / (xs[1] - xs[0]) elif x > xs[-1]: return ys[-1] + (x - xs[-1]) * (ys[-1] - ys[-2]) / (xs[-1] - xs[-2]) else: return interpolator(x) def ufunclike(xs): return np.array(list(map(pointwise, np.array(xs)))) return ufunclike
[docs] def loadmat(filename): """ Load a matlab data file with a complex data structure Parameters ---------- filename: str Name of the matlab file to import Returns ------- dict: Dictionary with the complex matlab data structure Examples -------- To read a matlab file do >>> import os >>> file_name = os.path.join("..", "data", "RAO_7.mat") >>> data = loadmat(filename=file_name) Now have a look at the contents >>> print_mat_nested(data) Key: DirRange Key: FreqRange Key: RAO Key: __globals__ Key: __header__ Key: __version__ The data stored in the Dir Range field can be accessed as >>> data["DirRange"] array([ 0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345], dtype=uint16) Notes ----- * This function should be called instead of direct spio.loadmat as it cures the problem of not properly recovering python dictionaries from mat files. It calls the function check keys to cure all entries which are still mat-objects References ---------- * http://pyhogs.github.io/reading-mat-files.html * http://stackoverflow.com/questions/7008608/ scipy-io-loadmat-nested-structures-i-e-dictionaries """ data = spio.loadmat(filename, struct_as_record=False, squeeze_me=True) return _check_keys(data)
def _check_keys(dict): """ checks if entries in dictionary are mat-objects. If yes todict is called to change them to nested dictionaries Parameters ---------- dict Returns ------- dict Dictionary with the complex data structure """ for key in dict: if isinstance(dict[key], spio.matlab.mio5_params.mat_struct): dict[key] = _todict(dict[key]) return dict def _todict(matobj): """ Convert a matlab object to a dictionary Parameters ---------- matobj: A matlab opject Returns ------- dict Dictionary with a matlab object A recursive function which constructs from matobjects nested dictionaries """ dict = {} for strg in matobj._fieldnames: elem = matobj.__dict__[strg] if isinstance(elem, spio.matlab.mio5_params.mat_struct): dict[strg] = _todict(elem) else: dict[strg] = elem return dict if __name__ == "__main": import doctest doctest.testmod()