"""
Some numerical utilities used in other modules
"""
import re
import numpy as np
import scipy.io as spio
[docs]
def ecdf2percentile(ecdf, percentile):
"""Calculate a percentile of an Empirical CDF function as returned by the
`statsmodels` package
Parameters
----------
ecdf : :obj:`statsmodel.distributions.empirical_distribution.ECDF`
Empirical CDF as a step function.
percentile : float
Percentile of the distribution
Returns
-------
float:
value where a fraction of `percentile` will be lower
Examples
--------
>>> import statsmodels.api as sm
>>> np.random.seed(0)
>>> number_of_observations = 100
>>> # generate random data variing in between 0 and 100
>>> x_data = 100 * np.random.rand(number_of_observations)
>>> # calculate the cumulative distribution function of this random data
>>> e_cdf = sm.distributions.empirical_distribution.ECDF(x_data)
>>> # print some P values belonging to the percentiles
>>> print("percentile {:2.1f} : ecdf ={:6.2f}".format(0.1,
... ecdf2percentile(ecdf=e_cdf, percentile=0.1)))
percentile 0.1 : ecdf = 9.61
>>> print("percentile {:2.1f} : ecdf ={:6.2f}".format(0.5,
... ecdf2percentile(ecdf=e_cdf, percentile=0.5)))
percentile 0.5 : ecdf = 46.87
>>> print("percentile {:2.1f} : ecdf ={:6.2f}".format(0.9,
... ecdf2percentile(ecdf=e_cdf, percentile=0.9)))
percentile 0.9 : ecdf = 89.18
"""
for i in range(ecdf.n):
if ecdf.y[i] > percentile:
return ecdf.x[i]
[docs]
def get_parameter_list_key(parlist):
"""Utility for the qtgraphs Parameter Item
Parameters
----------
parlist :
The parameter tree list (is an ordered list) contain all the values and current
one
Returns
-------
str
The name belonging to the current value
Notes
-----
The parameter tree widget has a field 'list' in which a list of values is given with
corresponding integers. The current integer belonging to the parlist is obtained by
parlist.value() however, to get the associated value of the key field is less
straightforward In this routine it is retrieved
"""
# the current value of the parlist
value = parlist.value()
# get the reverse list from the parlist
reverselist = parlist.reverse
# get the index belonging to the current value
index = reverselist[0].index(value)
# get the name of the key of this index
keyname = reverselist[1][index]
return keyname
[docs]
def get_column_with_max_cumulative_value(data, regular_expression=".*"):
"""Find the column of a pandas DataFrame with the maximum cumulative value
Parameters
----------
data : DataFrame
Data frame with the columns
regular_expression : str
Regular expression used to make a selection of columns to include. Default to
'.*', which means that all columns are included
Returns
-------
str or None:
The name of the column with the maximum cumulative value or None if no columns
were found
Notes
-----
* Only the columns with a name obeying the regular expression are taken into account
* An example of usage can be found in the fatigue monitoring software where we have
data frames with damage over all the channels at a hot spots. If you want to
obtained the channel with the maximum cumulative damage you can use this function
Examples
--------
>>> import string
>>> import pandas as pd
>>> np.random.seed(0)
>>> n_cols = 5
>>> n_rows = 10
Create a 10 x 5 data frame with random values with columns named as A, B, C, etc
>>> data_frame = pd.DataFrame(np.random.random_sample((n_rows, n_cols)),
... columns=list(string.ascii_uppercase)[:n_cols])
Obtain the name of the column with the maximum cumulative value
>>> get_column_with_max_cumulative_value(data_frame)
'D'
Obtain the name of the column with the maximum cumulative value only including
colums A, B and C
>>> get_column_with_max_cumulative_value(data_frame, regular_expression="[ABC]")
'C'
"""
columns = list()
for col in data.columns.values:
if re.match(regular_expression, col):
columns.append(col)
if columns:
name_of_maximum_column = data.fillna(0).sum()[columns[0] : columns[-1]].idxmax()
else:
name_of_maximum_column = None
return name_of_maximum_column
[docs]
def nans(shape, dtype=float):
"""Create an array filled with :class:`numpy.nan` values
Parameters
----------
shape : tuple
Shape of the array of nans to create
dtype : `numpy.dtype`, optional, default=float
Type of the nan
Returns
-------
ndarray
Array filled with `numpy.nan` values
Examples
--------
>>> nans((2, 3))
array([[ nan, nan, nan],
[ nan, nan, nan]])
"""
# create an array of nans with give shape
arr = np.empty(shape, dtype)
arr.fill(np.nan)
return arr
[docs]
def get_nearest_index(data, value):
"""
Find the index of the first occurrence of a value in a array with monotonically
increasing values
Parameters
----------
data : array_like
Array with monotonically increasing numbers
value :
Value of the point to give the index
Returns
-------
int
Index of floor cell where the values in the array first exceed the value
Example
-------
>>> data_array = np.linspace(0, 10, 8, endpoint=False)
>>> print(data_array)
[ 0. 1.25 2.5 3.75 5. 6.25 7.5 8.75]
>>> get_nearest_index(data_array, value=3)
2
>>> get_nearest_index(data_array, value=3.5)
2
>>> get_nearest_index(data_array, value=8.5)
6
>>> get_nearest_index(data_array, value=10)
7
>>> get_nearest_index(data_array, value=-1)
0
In case of a non-increasing array an AssertionError is raised
>>> data_array = np.array([3, 0, 2, 4, 2.11, 1])
>>> try:
... get_nearest_index(data_array, value=2.1)
... except AssertionError as err:
... print("An assertion error was raised")
An assertion error was raised
Raises
------
AssertionError
In case the array does not have monotonically increasing values
Notes
-----
* Only arrays with monotonically increasing values are allowed.
* In case the nearest index of an arbitrary array is needed, `find_inx_nearest_val`
should be used
* In case the `value` a larger than any value in the `data` array, the maximum index
is returned, while in case the `value` is smaller than any value in the `data`
array, a zero is return
See Also
--------
find_idx_nearest_val: a function to get the index of the nearest value for an
arbitraray array
"""
data_array = np.asanyarray(data)
if (np.diff(data_array) < 0).any():
raise AssertionError(
"Only monotonic increasing arrays are allowed. "
"Use `find_inx_nearest_val` instead "
)
sgn = np.sign(np.asarray(data) - value)
try:
nearest_index = np.where(np.diff(sgn) > 0)[0][0]
except IndexError:
if sgn[0] > 0:
nearest_index = 0
else:
nearest_index = data.size - 1
return nearest_index
[docs]
def find_idx_nearest_val(array, value):
"""Find the nearest index of a value in a array.
Parameters
----------
array : array_like
an array with values
value : float
the value for which we want the nearest index
Returns
-------
int
The index of the nearest value to 'value' in array.
the number of items in the array
Examples
--------
>>> data_array = np.linspace(0, 10, 8, endpoint=False)
>>> print(data_array)
[ 0. 1.25 2.5 3.75 5. 6.25 7.5 8.75]
>>> find_idx_nearest_val(data_array, value=3)
2
>>> find_idx_nearest_val(data_array, value=3.5)
3
>>> find_idx_nearest_val(data_array, value=8.5)
7
>>> find_idx_nearest_val(data_array, value=10)
7
>>> find_idx_nearest_val(data_array, value=-1)
0
In case of a non-increasing array with equal candidates, the last one is returned
>>> data_array = np.array([3, 0, 2, 4, 2, 1])
>>> find_idx_nearest_val(data_array, value=2.1)
4
>>> data_array = np.array([3, 0, 2, 4, 2, 1, 2])
>>> find_idx_nearest_val(data_array, value=2.1)
6
If we change the first candidate, so it becomes the nearest, this one will be
returned
>>> data_array = np.array([3, 0, 2.09, 4, 2, 1])
>>> find_idx_nearest_val(data_array, value=2.1)
2
Notes
-----
* In case that 2 or more items exist with the same distance from `value`, the
*last* occurrence is returned
* In case the value is outside the range of any value inside the array, either 0 or
N-1 is return, with N the number of array elements of the input array
See Also
--------
get_nearest_index:
This function returns the first occurrence and only works for monotonically
increasing arrays
"""
idx_sorted = np.argsort(array)
sorted_array = np.array(array[idx_sorted])
idx = np.searchsorted(sorted_array, [value], side="left")
try:
# in case search sorted returns a list of values take the first one
idx = idx[0]
except IndexError:
# it was not a list, so do't do anything
pass
if idx >= len(array):
idx_nearest = idx_sorted[len(array) - 1]
elif idx == 0:
idx_nearest = idx_sorted[0]
else:
if abs(value - sorted_array[idx - 1]) < abs(value - sorted_array[idx]):
idx_nearest = idx_sorted[idx - 1]
else:
idx_nearest = idx_sorted[idx]
return idx_nearest
[docs]
def get_range_from_string(range_string):
"""Analyse a range string to get the start, end and step and return a numpy array
Parameters
----------
range_string: str, start:end:[step]
String representing the start, end and step size of a range
Returns
-------
ndarray:
1-D Array with points defined by range_string
Examples
--------
>>> get_range_from_string("0:10:2")
array([ 0., 2., 4., 6., 8., 10.])
>>> get_range_from_string("0:7")
array([ 0., 1., 2., 3., 4., 5., 6., 7.])
>>> get_range_from_string("3:4:0.2")
array([ 3. , 3.2, 3.4, 3.6, 3.8, 4. ])
"""
values = range_string.split(":")
try:
start = values[0]
stop = values[1]
try:
spacing = values[2]
except IndexError:
spacing = 1.0
except IndexError:
raise AssertionError("Usage supply string 'start:end:[step]'")
try:
start = float(start)
stop = float(stop)
spacing = float(spacing)
except ValueError:
raise AssertionError(
"Could not convert one of the value to a float from string {}"
"".format(range_string)
)
n_points = int((stop - start) / spacing) + 1
return np.linspace(start, stop, n_points)
[docs]
def make_2d_array_cyclic(data_2d, axis=0, add_constant=0.0):
"""Makes a 2D array period by copying the first row toward the end
Parameters
----------
data_2d : ndarray
NxM array carrying the data. It is assumed that the data along one of the axis
is periodic and that we want to make the array cyclic by copying the first row
or column to the end
axis: int
Axis to make periodic. Default = 0
add_constant : float
Add this constant to the copy row or column to allow to add 2*pi or 360
Returns
-------
ndarray
Same 2d data array with one extra column (N+1 x M for axis == 0) or one extra
row (N x M + 1 for axis == 1)
Examples
--------
First make some 2D data array using mesh grid containing the direction we want to
make periodic
>>> directions = np.linspace(0, 360, 6, endpoint=False)
>>> frequencies = np.linspace(0, 3, 3)
>>> dd, ff = np.meshgrid(directions, frequencies)
>>> dd
array([[ 0., 60., 120., 180., 240., 300.],
[ 0., 60., 120., 180., 240., 300.],
[ 0., 60., 120., 180., 240., 300.]])
The array `dd` is periodic along the axis = 0 direction. So copy the first column to
the end
>>> dd_periodic = make_2d_array_cyclic(dd)
>>> dd_periodic
array([[ 0., 60., 120., 180., 240., 300., 0.],
[ 0., 60., 120., 180., 240., 300., 0.],
[ 0., 60., 120., 180., 240., 300., 0.]])
We can do the same in case we have the transposed version, only we have to use the
axis = 1 argument to pick the right axis
>>> dd_tr = dd.T
>>> dd_tr
array([[ 0., 0., 0.],
[ 60., 60., 60.],
[ 120., 120., 120.],
[ 180., 180., 180.],
[ 240., 240., 240.],
[ 300., 300., 300.]])
>>> dd_tr_periodic = make_2d_array_cyclic(dd_tr, axis=1)
>>> dd_tr_periodic
array([[ 0., 0., 0.],
[ 60., 60., 60.],
[ 120., 120., 120.],
[ 180., 180., 180.],
[ 240., 240., 240.],
[ 300., 300., 300.],
[ 0., 0., 0.]])
For polar plotting it is required that the angle is increasing, which means that
the last 0 needs to be 360. This can be established by using the `add_constant`
option
>>> dd_periodic = make_2d_array_cyclic(dd, add_constant=360.)
>>> dd_periodic
array([[ 0., 60., 120., 180., 240., 300., 360.],
[ 0., 60., 120., 180., 240., 300., 360.],
[ 0., 60., 120., 180., 240., 300., 360.]])
Notes
-----
This function can be used to ensure that a 2D data array with directions at one axis
can be easily made periodic into the direction axis. This is a requirement to make a
polar plot
"""
if axis == 0:
# Create a Mx1 array of the first column of the 2D data array
first_col = data_2d[:, 0].reshape(data_2d.shape[0], 1) + add_constant
data_2d_periodic = np.hstack((data_2d, first_col))
elif axis == 1:
# create a 1xN array of the first row of the 2D data array
first_row = data_2d[0, :].reshape(1, data_2d.shape[1]) + add_constant
data_2d_periodic = np.vstack((data_2d, first_row))
else:
raise AssertionError(f"Argument `axis` can only be 0 or 1. {axis} given.")
return data_2d_periodic
[docs]
def print_mat_nested(d, indent=0, nkeys=0):
"""
Pretty print nested structures from .mat files
Parameters
----------
indent: int
Number of spaces to indent
nkeys: int
Number of keys to show
References
----------
* http://stackoverflow.com/questions/3229419/
pretty-printing-nested-dictionaries-in-python
"""
# Subset dictionary to limit keys to print. Only works on first level
if nkeys > 0:
d = {
k: d[k] for k in sorted(d.keys())[:nkeys]
} # Dictionary comprehension: limit to first nkeys keys.
if isinstance(d, dict):
for key in sorted(d.keys()): # loops through key, sort them
value = d[key]
print("{}{}{}".format("\t" * indent, "Key: ", str(key)))
print_mat_nested(value, indent + 1)
if (
isinstance(d, np.ndarray) and d.dtype.names is not None
): # Note: and short-circuits by default
for n in d.dtype.names: # This means it's a struct, it's bit of a kludge test.
print("{}{}{}".format("\t" * indent, "Field: ", str(key)))
print_mat_nested(d[n], indent + 1)
[docs]
def loadmat(filename):
"""
Load a matlab data file with a complex data structure
Parameters
----------
filename: str
Name of the matlab file to import
Returns
-------
dict:
Dictionary with the complex matlab data structure
Examples
--------
To read a matlab file do
>>> import os
>>> file_name = os.path.join("..", "data", "RAO_7.mat")
>>> data = loadmat(filename=file_name)
Now have a look at the contents
>>> print_mat_nested(data)
Key: DirRange
Key: FreqRange
Key: RAO
Key: __globals__
Key: __header__
Key: __version__
The data stored in the Dir Range field can be accessed as
>>> data["DirRange"]
array([ 0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180,
195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345], dtype=uint16)
Notes
-----
* This function should be called instead of direct spio.loadmat as it cures the
problem of not properly recovering python dictionaries from mat files. It calls
the function check keys to cure all entries which are still mat-objects
References
----------
* http://pyhogs.github.io/reading-mat-files.html
* http://stackoverflow.com/questions/7008608/
scipy-io-loadmat-nested-structures-i-e-dictionaries
"""
data = spio.loadmat(filename, struct_as_record=False, squeeze_me=True)
return _check_keys(data)
def _check_keys(dict):
"""
checks if entries in dictionary are mat-objects. If yes
todict is called to change them to nested dictionaries
Parameters
----------
dict
Returns
-------
dict
Dictionary with the complex data structure
"""
for key in dict:
if isinstance(dict[key], spio.matlab.mio5_params.mat_struct):
dict[key] = _todict(dict[key])
return dict
def _todict(matobj):
"""
Convert a matlab object to a dictionary
Parameters
----------
matobj:
A matlab opject
Returns
-------
dict
Dictionary with a matlab object
A recursive function which constructs from matobjects nested dictionaries
"""
dict = {}
for strg in matobj._fieldnames:
elem = matobj.__dict__[strg]
if isinstance(elem, spio.matlab.mio5_params.mat_struct):
dict[strg] = _todict(elem)
else:
dict[strg] = elem
return dict
if __name__ == "__main":
import doctest
doctest.testmod()