Source code for idf_analysis.event_series_analysis

import warnings

from math import floor, e

import numpy as np
import pandas as pd

from scipy.stats import linregress

from .definitions import PARAM, SERIES, COL
from .sww_utils import year_delta, guess_freq, rain_events, agg_events


[docs] def annual_series(rolling_sum_values, year_index): """ Create an annual series of the maximum overlapping sum per year and calculate the "u" and "w" parameters. acc. to DWA-A 531 chap. 5.1.5 Gumbel distribution | https://en.wikipedia.org/wiki/Gumbel_distribution Args: rolling_sum_values (numpy.ndarray): Array with maximum rolling sum per event per year. year_index (numpy.ndarray): Array with year of the event. Returns: dict[str, float]: Parameter u and w from the annual series for a specific duration step as a tuple. """ annually_series = pd.Series(rolling_sum_values).groupby(year_index).max().values # annually_series = pd.Series(data=rolling_sum_values, # index=events[COL.START].values).resample('AS').max().index annually_series = np.sort(annually_series)[::-1] sample_size = annually_series.size index = np.arange(sample_size) + 1 x = -np.log(np.log((sample_size + 0.2) / (sample_size - index + 0.6))) return _lin_regress(x, annually_series)
def _plotting_formula(k, l, m): """ plotting function acc. to DWA-A 531 chap. 5.1.3 for the partial series Args: k (float): running index l (float): sample size m (float): measurement period Returns: float: estimated empirical return period """ return (l + 0.2) * m / ((k - 0.4) * l)
[docs] def partial_series(rolling_sum_values, measurement_period): """ Create a partial series of the largest overlapping sums and calculate the "u" and "w" parameters. acc. to DWA-A 531 chap. 5.1.4 Exponential distribution | https://en.wikipedia.org/wiki/Exponential_distribution Args: rolling_sum_values (numpy.ndarray): Array with maximum rolling sum per event. measurement_period (float): Measurement period in years. Returns: dict[str, float]: parameter u and w from the partial series for a specific duration step as a tuple """ partially_series = rolling_sum_values partially_series = np.sort(partially_series)[::-1] # Use only the (2-3 multiplied with the number of measuring years) of the biggest # values in the database (-> acc. to ATV-A 121 chap. 4.3; DWA-A 531 chap. 4.4). # As a requirement for the extreme value distribution. threshold_sample_size = int(floor(measurement_period * e)) if partially_series.size < threshold_sample_size: warnings.warn('Fewer events in series than recommended for extreme value analysis. Use the results with mindfulness.') threshold_sample_size = partially_series.size partially_series = partially_series[:threshold_sample_size] sample_size = threshold_sample_size index = np.arange(sample_size) + 1 log_return_periods = np.log(_plotting_formula(index, sample_size, measurement_period)) return _lin_regress(log_return_periods, partially_series)
def _lin_regress2(x, y): # based on math - custom function sample_size = len(x) x_mean = np.mean(x) y_mean = np.mean(y) slope = ((x * y).sum() - sample_size * y_mean * x_mean) / \ ((x ** 2).sum() - sample_size * x_mean ** 2) intercept = y_mean - slope * x_mean return {PARAM.U: intercept, PARAM.W: slope} def _lin_regress(x, y): # using scipy res = linregress(x, y) return {PARAM.U: res.intercept, PARAM.W: res.slope} def _improve_factor(interval): """ correction factor acc. to DWA-A 531 chap. 4.3 Args: interval (float): length of the interval: number of observations per duration Returns: float: correction factor """ improve_factor = {1: 1.14, 2: 1.07, 3: 1.04, 4: 1.03, 5: 1.00, 6: 1.00} return np.interp(interval, list(improve_factor.keys()), list(improve_factor.values()))
[docs] def iter_event_series(file_input, duration_steps): """ Statistical analysis for each duration step. acc. to DWA-A 531 chap. 5.1 Save the parameters of the distribution function as interim results. Args: file_input (pandas.Series): precipitation data duration_steps (list[int] | numpy.ndarray): in minutes Yields: series: max rainfall intensity per event for each duration step """ ts = file_input.copy() # ------------------------------- base_frequency = guess_freq(ts.index) # DateOffset/Timedelta # ------------------------------------------------------------------------------------------------------------------ # acc. to DWA-A 531 chap. 4.2: # The values must be independent of each other for the statistical evaluations. # estimated four hours acc. (Schilling, 1984) # for larger durations - use the duration as minimal gap min_gap_schilling = pd.Timedelta(hours=4) # -------------- # if # use only duration for splitting events # may increase design-rain-height of smaller durations # ------------------------------- try: from tqdm.auto import tqdm pbar = tqdm(duration_steps, desc='Calculating Parameters u and w') except ModuleNotFoundError: pbar = duration_steps for duration_integer in pbar: pbar.set_description('Calculating Parameters u and w for duration {:0.0f}'.format(duration_integer)) duration = pd.Timedelta(minutes=duration_integer) if duration < pd.Timedelta(base_frequency): continue if duration < min_gap_schilling: min_gap = min_gap_schilling else: min_gap = duration events = rain_events(ts, min_gap=min_gap) # Correction factor acc. to DWA-A 531 chap. 4.3 improve = _improve_factor(duration / base_frequency) roll_sum = ts.rolling(duration).sum() year_index = events[COL.START].dt.year.values # events[COL.rolling_sum_valuesAX_OVERLAPPING_SUM] = agg_events(events, roll_sum, 'max') * improve yield agg_events(events, roll_sum, 'max') * improve, duration_integer, year_index
[docs] def calculate_u_w(file_input, duration_steps, series_kind): """ Statistical analysis for each duration step. acc. to DWA-A 531 chap. 5.1 Save the parameters of the distribution function as interim results. acc. to DWA-A 531 chap. 4.4: use the annual series only for measurement periods over 20 years Args: file_input (pandas.Series): precipitation data duration_steps (list[int] | numpy.ndarray): in minutes series_kind (str): 'annual' or 'partial' Returns: dict[int, dict]: with key=durations and values=dict(u, w) """ # ------------------------------- # measuring time in years measurement_start, measurement_end = file_input.index[[0, -1]] measurement_period = (measurement_end - measurement_start) / year_delta(years=1) if round(measurement_period, 1) < 10: warnings.warn("The measurement period is too short. The results may be inaccurate! " "It is recommended to use at least ten years. " "(-> Currently {}a used)".format(measurement_period)) # ------------------------------- interim_results = {} for rolling_sum_values, duration_integer, year_index in iter_event_series(file_input, duration_steps): if series_kind == SERIES.ANNUAL: interim_results[duration_integer] = annual_series(rolling_sum_values, year_index) elif series_kind == SERIES.PARTIAL: interim_results[duration_integer] = partial_series(rolling_sum_values, measurement_period) else: raise NotImplementedError return interim_results