Source code for idf_analysis.little_helpers

import datetime

import pandas as pd

from .definitions import COL



[docs]
def delta2min(time_delta):
    """
    convert timedelta to float in minutes

    Args:
        time_delta (pandas.Timedelta, pandas.DateOffset):

    Returns:
        float: the timedelta in minutes
    """
    if isinstance(time_delta, pd.DateOffset):
        try:
            time_delta = pd.Timedelta(time_delta)
        except ValueError:
            time_delta = time_delta._pd_timedelta
    return int(time_delta.total_seconds() / 60)




[docs]
def minutes_readable(minutes):
    """
    convert the duration in minutes to a more readable form

    Args:
        minutes (float | int): duration in minutes

    Returns:
        str: duration as a string
    """
    one_hour = 60
    one_day = one_hour * 24
    if minutes <= one_hour:
        return f'{minutes:0.0f} min'
    elif one_hour < minutes < one_day:
        hours = minutes / one_hour
        if hours % 1:  # has decimals
            return f'{hours:0.1f} h'
        else:
            return f'{hours:0.0f} h'
    else:  # 60 * 24 <= minutes
        days = minutes / one_day
        if days % 1:  # has decimals
            return f'{days:0.1f} d'
        else:
            return f'{days:0.0f} d'




[docs]
def duration_steps_readable(durations):
    """
    convert the durations to a more readable form

    Args:
        durations (list[int | float]): in minutes

    Returns:
        list[str]: of the readable duration list
    """
    return [minutes_readable(i) for i in durations]




[docs]
def height2rate(height_of_rainfall, duration):
    """
    calculate the specific rain flow rate in [l/(s*ha)]

    if 2 array-like parameters are give, an element-wise calculation will be made.
    So the length of the array must be the same.

    Args:
        height_of_rainfall (float | np.ndarray | pd.Series): height_of_rainfall: in [mm]
        duration (float | np.ndarray | pd.Series): in minutes

    Returns:
        float | np.ndarray | pd.Series: specific rain flow rate in [l/(s*ha)]
    """
    return height_of_rainfall / duration * (1000 / 6)




[docs]
def rate2height(rain_flow_rate, duration):
    """
    convert the rain flow rate to the height of rainfall in [mm]

    if 2 array-like parameters are give, an element-wise calculation will be made.
    So the length of the array must be the same.

    Args:
        rain_flow_rate (float | np.ndarray | pd.Series): in [l/(s*ha)]
        duration (float | np.ndarray | pd.Series): in minutes

    Returns:
        float | np.ndarray | pd.Series: height of rainfall in [mm]
    """
    return rain_flow_rate * duration / (1000 / 6)



def get_progress_bar(iterator, desc=None):
    try:
        from tqdm.auto import tqdm
        return tqdm(iterator, desc=desc)
    except ModuleNotFoundError:
        return iterator


def frame_looper(size, columns, label='return periods'):
    if size > 30000:  # if > 3 weeks, use a progressbar
        return get_progress_bar(columns, desc=f'calculating {label} data-frame')
    else:
        return columns



[docs]
def event_caption(event, unit='mm', lang='en', detailed_short_return_period=False):
    """
    Generates a human-readable caption for a rain event in English or German.

    Args:
        event (dict or pandas.Series): A dictionary or Series containing event details. Expected keys include:
            - COL.START: Start time of the event.
            - COL.END: End time of the event.
            - COL.LP: Total rainfall sum (optional).
            - COL.DUR: Duration of the event (optional).
            - COL.MAX_PERIOD: Maximum return period (optional).
            - COL.MAX_PERIOD_DURATION: Duration of the maximum return period in minutes (optional).
        unit (str, optional): Unit for rainfall (default: 'mm').
        lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').
        detailed_short_return_period (bool): Whether a return period of < 1 should be formatted as 1/n.

    Returns:
        str: A formatted string describing the rain event.

    Example:
        Given an event with:
            - COL.START = pd.Timestamp('2023-01-01 12:00')
            - COL.END = pd.Timestamp('2023-01-01 14:00')
            - COL.LP = 15.5
            - COL.DUR = pd.Timedelta(hours=2)
            - COL.MAX_PERIOD = 10
            - COL.MAX_PERIOD_DURATION = 30

        The german output might look like:
            "Regenereignis
             von 01.01.2023 12:00 bis 14:00
             mit einer Regensumme von 15.5 mm
             und einer Dauer von 2 Stunden.
             Die maximale Wiederkehrperiode war 10 a
             bei einer Dauerstufe von 30 minutes."

        The english output might look like:
            "rain event
             between 2023-01-01 12:00 and 14:00
             with a total sum of 15.5 mm
             and a duration of 2 hours.
             The maximum return period was 10 years
             at a duration of 30 minutes."
    """
    di_caption_strings = {
        'en': {
            'event': 'rain event',
            'with_total': 'with a total sum of',
            'and_duration': 'and a duration of',
            'max_period': 'The maximum return period was',
            'at_duration': 'at a duration of',
            'years': 'years'
        },
        'de': {
            'event': 'Regenereignis',
            'with_total': 'mit einer Regensumme von',
            'and_duration': 'und einer Dauer von',
            'max_period': 'Die maximale Wiederkehrperiode war',
            'at_duration': 'bei einer Dauerstufe von',
            'years': 'a'
        }
    }

    cap = di_caption_strings.get(lang, di_caption_strings['en'])
    caption = f"{cap['event']}\n"

    if (COL.START in event) and (COL.END in event):
        start = event[COL.START]
        end = event[COL.END]

        if lang == 'de':
            if start.date() == end.date():
                # Beide Zeitpunkte sind am selben Tag
                caption += f"am {start.strftime('%d.%m.%Y von %H:%M')} bis {end.strftime('%H:%M')}\n"
            elif start.year == end.year:
                # Beide Zeitpunkte im selben Jahr
                if start.month == end.month:
                    # Beide Zeitpunkte im selben Jahr und Monat
                    caption += f"von {start.strftime('%d.%m.%Y %H:%M')} bis {end.strftime('%d. %H:%M')}\n"
                else:
                    caption += f"von {start.strftime('%d.%m.%Y %H:%M')} bis {end.strftime('%d.%m. %H:%M')}\n"
            else:
                # Unterschiedliche Jahre
                caption += f"von {start.strftime('%d.%m.%Y %H:%M')} bis {end.strftime('%d.%m.%Y %H:%M')}\n"
        else:
            caption += f'between {start:%Y-%m-%d %H:%M} and '
            if f'{start:%Y-%m-%d}' == f'{end:%Y-%m-%d}':
                # same day
                caption += f'{end:%H:%M}\n'
            elif f'{start:%Y-%m-}' == f'{end:%Y-%m-}':
                # same month
                caption += f'{end:%d %H:%M}\n'
            else:
                # different month
                caption += f'{end:%Y-%m-%d %H:%M}\n'

    if COL.LP in event:
        caption += f"{cap['with_total']} {event[COL.LP]:0.1f} {unit}\n"

    if COL.DUR in event:
        caption += f"{cap['and_duration']} {timedelta_readable(event[COL.DUR], lang=lang)}.\n"

    if COL.MAX_PERIOD in event:
        caption += (f"{cap['max_period']} "
                    f"{return_period_formatter(event[COL.MAX_PERIOD], smaller_detail=detailed_short_return_period)} "
                    f"{cap['years']}\n")

        if COL.MAX_PERIOD_DURATION in event:
            caption += f"{cap['at_duration']} {minutes_readable(event[COL.MAX_PERIOD_DURATION])}."

    return caption




[docs]
def return_period_formatter(t, smaller_detail=False):
    """
    Formats a return period value into a human-readable string.

    The function categorizes the return period value into specific ranges and returns a formatted string
    based on the range. This is useful for displaying return periods in a clear and concise manner.

    Args:
        t (float): The return period value to format.
        smaller_detail (bool): Whether a return period of < 1 should be formatted as 1/n.

    Returns:
        str: A formatted string representing the return period. The formatting rules are:
             - If t < 1: Returns "< 1".
             - If t > 200: Returns "$\\gg$ 100" (indicating much greater than 100).
             - If t > 100: Returns "> 100".
             - Otherwise: Returns the value formatted to one decimal place (e.g., "50.0").

    Example:
        >>> return_period_formatter(0.5)
        '< 1'
        >>> return_period_formatter(150)
        '> 100'
        >>> return_period_formatter(250)
        '$\\gg$ 100'
        >>> return_period_formatter(50.123)
        '50.1'
        >>> return_period_formatter(0.5, smaller_detail=True)
        '1/2'
    """
    if t < 1:
        if smaller_detail:
            n = int(round(1/t, 0))
            if n == 1:
                return '< 1'
            else:
                return f'1/{n}'
        else:
            return '< 1'
    elif t > 200:
        return '$\\gg$ 100'
    elif t > 100:
        return '> 100'
    else:
        return f'{t:0.1f}'




[docs]
def timedelta_components_plus(td, min_freq='min'):
    """
    Decomposes a timedelta into its components, approximating years and weeks.

    Args:
        td (datetime.timedelta or pandas.Timedelta): The time difference to decompose.
        min_freq (str, optional): The minimum frequency for rounding (e.g., 'min', 's'). Defaults to 'min'.

    Returns:
        list: A list of lists, where each sublist contains a numerical value and its corresponding time unit.

    Note:
        Leap years are not considered in year calculations.
        Possible components: [years, weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]
    """
    list_of_components = []

    if isinstance(td, datetime.timedelta):
        td = pd.to_timedelta(td)

    # years, weeks
    days_year = 365
    days_week = 7

    for label_component, value in td.round(min_freq).components._asdict().items():
        if label_component == 'days':
            years, value = value // days_year, value % days_year
            list_of_components.append([int(years), 'years'])

            value -= years // 4

            weeks, value = value // days_week, value % days_week
            list_of_components.append([int(weeks), 'weeks'])

        list_of_components.append([value, label_component])
    return list_of_components



# Dictionary with English time units as keys and their German translations as values
time_units = {
    "years": "Jahre",
    "year": "Jahr",
    "weeks": "Wochen",
    "week": "Woche",
    "days": "Tage",
    "day": "Tag",
    "hours": "Stunden",
    "hour": "Stunde",
    "minutes": "Minuten",
    "minute": "Minute",
    "seconds": "Sekunden",
    "second": "Sekunde",
    "milliseconds": "Millisekunden",
    "millisecond": "Millisekunde",
    "microseconds": "Mikrosekunden",
    "microsecond": "Mikrosekunde",
    "nanoseconds": "Nanosekunden",
    "nanosecond": "Nanosekunde"
}



[docs]
def timedelta_components_readable(list_of_components, short=False, sep=', ', lang='en'):
    """
    Converts a list of time components into a human-readable string.

    Args:
        list_of_components (list): A list of [value, unit] pairs representing time components.
        short (bool, optional): If True, uses abbreviated unit names (e.g., 'y' for years). Defaults to False.
        sep (str, optional): Separator between components. Defaults to ', '.
        lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').

    Returns:
        str: A formatted string representing the time components, with the last component joined by "and".

    Note:
        - Singular units (e.g., "1 year" instead of "1 years") are handled automatically.
        - The last separator is replaced with "and" for better readability unless short mode is enabled.

    Example:
        timedelta_components_readable([(2, 'days'), (3, 'hours')]) -> '2 days and 3 hours'
    """
    result = []

    for value, label_component in list_of_components:
        if value > 0:
            if short:
                unit_sep = ''
                unit = label_component[0]
            else:
                unit_sep = ' '
                unit = label_component
                if value == 1:
                    unit = label_component[:-1]
            if lang == 'de':
                unit = time_units.get(unit, unit)
            result.append(f'{value}{unit_sep}{unit}')

    s = sep.join(result)

    if not short:
        # replace last "," with "and"
        s = ' and '.join(s.rsplit(sep, 1))
    return s




[docs]
def timedelta_readable(td, min_freq='min', short=False, sep=', ', lang='en'):
    """
    Converts a timedelta into a human-readable string.

    Args:
        td (datetime.timedelta or pandas.Timedelta): The time difference to format.
        min_freq (str, optional): The minimum frequency for rounding (e.g., 'min', 's'). Defaults to 'min'.
        short (bool, optional): Whether to use abbreviated unit names (e.g., 'h' for hours). Defaults to False.
        sep (str, optional): Separator used between components in the output string. Defaults to ', '.
        lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').

    Returns:
        str: A formatted string representing the time duration.

    Note:
        Leap years are not considered in year calculations.

    Example:
        timedelta_readable(pd.Timedelta(days=400, hours=5)) -> '1 year, 5 weeks and 5 hours'
    """
    return timedelta_components_readable(timedelta_components_plus(td, min_freq), short=short, sep=sep, lang=lang)




[docs]
def timedelta_readable2(d1, d2, min_freq='min', short=False, sep=', ', lang='en'):
    """
    Computes the difference between two dates and returns a human-readable string.

    Args:
        d1 (datetime-like): The start date.
        d2 (datetime-like): The end date.
        min_freq (str, optional): The minimum frequency for rounding (e.g., 'min', 's'). Defaults to 'min'.
        short (bool, optional): Whether to use abbreviated unit names (e.g., 'h' for hours). Defaults to False.
        sep (str, optional): Separator used between components in the output string. Defaults to ', '.
        lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').

    Returns:
        str: A formatted string representing the time difference.

    Note:
        - Approximates the number of years by adjusting for the closest full year.
        - Leap years are not considered in year calculations.

    Example:
        timedelta_readable2(pd.Timestamp('2020-01-01'), pd.Timestamp('2023-06-15'))
        -> '3 years, 5 months and 14 days'
    """
    td = d2 - d1

    years = None
    if td > pd.Timedelta(days=365):
        # date of d2 but year of d1
        d2_new = d2.replace(year=d1.year)

        if d2_new < d1:
            # if day of d2 is earlier in the year ad day of d1
            d2_new = d2_new.replace(year=d1.year + 1)

        years = d2.year - d2_new.year

        td = d2_new - d1

    l = timedelta_components_plus(td, min_freq)

    if years is not None:
        l[0][0] = years

    return timedelta_components_readable(l, short=short, sep=sep, lang=lang)