import warnings
from math import floor, e
import numpy as np
import pandas as pd
from scipy.stats import linregress
from .definitions import PARAM, SERIES, COL
from .sww_utils import year_delta, guess_freq, rain_events, agg_events
[docs]
def annual_series(rolling_sum_values, year_index):
"""
Create an annual series of the maximum overlapping sum per year and calculate the "u" and "w" parameters.
acc. to DWA-A 531 chap. 5.1.5
Gumbel distribution | https://en.wikipedia.org/wiki/Gumbel_distribution
Args:
rolling_sum_values (numpy.ndarray): Array with maximum rolling sum per event per year.
year_index (numpy.ndarray): Array with year of the event.
Returns:
dict[str, float]: Parameter u and w from the annual series for a specific duration step as a tuple.
"""
annually_series = pd.Series(rolling_sum_values).groupby(year_index).max().values
# annually_series = pd.Series(data=rolling_sum_values,
# index=events[COL.START].values).resample('AS').max().index
annually_series = np.sort(annually_series)[::-1]
sample_size = annually_series.size
index = np.arange(sample_size) + 1
x = -np.log(np.log((sample_size + 0.2) / (sample_size - index + 0.6)))
return _lin_regress(x, annually_series)
def _plotting_formula(k, l, m):
"""
plotting function acc. to DWA-A 531 chap. 5.1.3 for the partial series
Args:
k (float): running index
l (float): sample size
m (float): measurement period
Returns:
float: estimated empirical return period
"""
return (l + 0.2) * m / ((k - 0.4) * l)
[docs]
def partial_series(rolling_sum_values, measurement_period):
"""
Create a partial series of the largest overlapping sums and calculate the "u" and "w" parameters.
acc. to DWA-A 531 chap. 5.1.4
Exponential distribution | https://en.wikipedia.org/wiki/Exponential_distribution
Args:
rolling_sum_values (numpy.ndarray): Array with maximum rolling sum per event.
measurement_period (float): Measurement period in years.
Returns:
dict[str, float]: parameter u and w from the partial series for a specific duration step as a tuple
"""
partially_series = rolling_sum_values
partially_series = np.sort(partially_series)[::-1]
# Use only the (2-3 multiplied with the number of measuring years) of the biggest
# values in the database (-> acc. to ATV-A 121 chap. 4.3; DWA-A 531 chap. 4.4).
# As a requirement for the extreme value distribution.
threshold_sample_size = int(floor(measurement_period * e))
if partially_series.size < threshold_sample_size:
warnings.warn('Fewer events in series than recommended for extreme value analysis. Use the results with mindfulness.')
threshold_sample_size = partially_series.size
partially_series = partially_series[:threshold_sample_size]
sample_size = threshold_sample_size
index = np.arange(sample_size) + 1
log_return_periods = np.log(_plotting_formula(index, sample_size, measurement_period))
return _lin_regress(log_return_periods, partially_series)
def _lin_regress2(x, y):
# based on math - custom function
sample_size = len(x)
x_mean = np.mean(x)
y_mean = np.mean(y)
slope = ((x * y).sum() - sample_size * y_mean * x_mean) / \
((x ** 2).sum() - sample_size * x_mean ** 2)
intercept = y_mean - slope * x_mean
return {PARAM.U: intercept, PARAM.W: slope}
def _lin_regress(x, y):
# using scipy
res = linregress(x, y)
return {PARAM.U: res.intercept, PARAM.W: res.slope}
def _improve_factor(interval):
"""
correction factor acc. to DWA-A 531 chap. 4.3
Args:
interval (float): length of the interval: number of observations per duration
Returns:
float: correction factor
"""
improve_factor = {1: 1.14,
2: 1.07,
3: 1.04,
4: 1.03,
5: 1.00,
6: 1.00}
return np.interp(interval,
list(improve_factor.keys()),
list(improve_factor.values()))
[docs]
def iter_event_series(file_input, duration_steps):
"""
Statistical analysis for each duration step.
acc. to DWA-A 531 chap. 5.1
Save the parameters of the distribution function as interim results.
Args:
file_input (pandas.Series): precipitation data
duration_steps (list[int] | numpy.ndarray): in minutes
Yields:
series: max rainfall intensity per event for each duration step
"""
ts = file_input.copy()
# -------------------------------
base_frequency = guess_freq(ts.index) # DateOffset/Timedelta
# ------------------------------------------------------------------------------------------------------------------
# acc. to DWA-A 531 chap. 4.2:
# The values must be independent of each other for the statistical evaluations.
# estimated four hours acc. (Schilling, 1984)
# for larger durations - use the duration as minimal gap
min_gap_schilling = pd.Timedelta(hours=4)
# --------------
# if
# use only duration for splitting events
# may increase design-rain-height of smaller durations
# -------------------------------
try:
from tqdm.auto import tqdm
pbar = tqdm(duration_steps, desc='Calculating Parameters u and w')
except ModuleNotFoundError:
pbar = duration_steps
for duration_integer in pbar:
pbar.set_description('Calculating Parameters u and w for duration {:0.0f}'.format(duration_integer))
duration = pd.Timedelta(minutes=duration_integer)
if duration < pd.Timedelta(base_frequency):
continue
if duration < min_gap_schilling:
min_gap = min_gap_schilling
else:
min_gap = duration
events = rain_events(ts, min_gap=min_gap)
# Correction factor acc. to DWA-A 531 chap. 4.3
improve = _improve_factor(duration / base_frequency)
roll_sum = ts.rolling(duration).sum()
year_index = events[COL.START].dt.year.values
# events[COL.rolling_sum_valuesAX_OVERLAPPING_SUM] = agg_events(events, roll_sum, 'max') * improve
yield agg_events(events, roll_sum, 'max') * improve, duration_integer, year_index
[docs]
def calculate_u_w(file_input, duration_steps, series_kind):
"""
Statistical analysis for each duration step.
acc. to DWA-A 531 chap. 5.1
Save the parameters of the distribution function as interim results.
acc. to DWA-A 531 chap. 4.4: use the annual series only for measurement periods over 20 years
Args:
file_input (pandas.Series): precipitation data
duration_steps (list[int] | numpy.ndarray): in minutes
series_kind (str): 'annual' or 'partial'
Returns:
dict[int, dict]: with key=durations and values=dict(u, w)
"""
# -------------------------------
# measuring time in years
measurement_start, measurement_end = file_input.index[[0, -1]]
measurement_period = (measurement_end - measurement_start) / year_delta(years=1)
if round(measurement_period, 1) < 10:
warnings.warn("The measurement period is too short. The results may be inaccurate! "
"It is recommended to use at least ten years. "
"(-> Currently {}a used)".format(measurement_period))
# -------------------------------
interim_results = {}
for rolling_sum_values, duration_integer, year_index in iter_event_series(file_input, duration_steps):
if series_kind == SERIES.ANNUAL:
interim_results[duration_integer] = annual_series(rolling_sum_values, year_index)
elif series_kind == SERIES.PARTIAL:
interim_results[duration_integer] = partial_series(rolling_sum_values, measurement_period)
else:
raise NotImplementedError
return interim_results