import datetime
import pandas as pd
from .definitions import COL
[docs]
def delta2min(time_delta):
"""
convert timedelta to float in minutes
Args:
time_delta (pandas.Timedelta, pandas.DateOffset):
Returns:
float: the timedelta in minutes
"""
if isinstance(time_delta, pd.DateOffset):
try:
time_delta = pd.Timedelta(time_delta)
except ValueError:
time_delta = time_delta._pd_timedelta
return int(time_delta.total_seconds() / 60)
[docs]
def minutes_readable(minutes):
"""
convert the duration in minutes to a more readable form
Args:
minutes (float | int): duration in minutes
Returns:
str: duration as a string
"""
one_hour = 60
one_day = one_hour * 24
if minutes <= one_hour:
return f'{minutes:0.0f} min'
elif one_hour < minutes < one_day:
hours = minutes / one_hour
if hours % 1: # has decimals
return f'{hours:0.1f} h'
else:
return f'{hours:0.0f} h'
else: # 60 * 24 <= minutes
days = minutes / one_day
if days % 1: # has decimals
return f'{days:0.1f} d'
else:
return f'{days:0.0f} d'
[docs]
def duration_steps_readable(durations):
"""
convert the durations to a more readable form
Args:
durations (list[int | float]): in minutes
Returns:
list[str]: of the readable duration list
"""
return [minutes_readable(i) for i in durations]
[docs]
def height2rate(height_of_rainfall, duration):
"""
calculate the specific rain flow rate in [l/(s*ha)]
if 2 array-like parameters are give, an element-wise calculation will be made.
So the length of the array must be the same.
Args:
height_of_rainfall (float | np.ndarray | pd.Series): height_of_rainfall: in [mm]
duration (float | np.ndarray | pd.Series): in minutes
Returns:
float | np.ndarray | pd.Series: specific rain flow rate in [l/(s*ha)]
"""
return height_of_rainfall / duration * (1000 / 6)
[docs]
def rate2height(rain_flow_rate, duration):
"""
convert the rain flow rate to the height of rainfall in [mm]
if 2 array-like parameters are give, an element-wise calculation will be made.
So the length of the array must be the same.
Args:
rain_flow_rate (float | np.ndarray | pd.Series): in [l/(s*ha)]
duration (float | np.ndarray | pd.Series): in minutes
Returns:
float | np.ndarray | pd.Series: height of rainfall in [mm]
"""
return rain_flow_rate * duration / (1000 / 6)
def get_progress_bar(iterator, desc=None):
try:
from tqdm.auto import tqdm
return tqdm(iterator, desc=desc)
except ModuleNotFoundError:
return iterator
def frame_looper(size, columns, label='return periods'):
if size > 30000: # if > 3 weeks, use a progressbar
return get_progress_bar(columns, desc=f'calculating {label} data-frame')
else:
return columns
[docs]
def event_caption(event, unit='mm', lang='en'):
"""
Generates a human-readable caption for a rain event in English or German.
Args:
event (dict or pandas.Series): A dictionary or Series containing event details. Expected keys include:
- COL.START: Start time of the event.
- COL.END: End time of the event.
- COL.LP: Total rainfall sum (optional).
- COL.DUR: Duration of the event (optional).
- COL.MAX_PERIOD: Maximum return period (optional).
- COL.MAX_PERIOD_DURATION: Duration of the maximum return period in minutes (optional).
unit (str, optional): Unit for rainfall (default: 'mm').
lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').
Returns:
str: A formatted string describing the rain event.
Example:
Given an event with:
- COL.START = pd.Timestamp('2023-01-01 12:00')
- COL.END = pd.Timestamp('2023-01-01 14:00')
- COL.LP = 15.5
- COL.DUR = pd.Timedelta(hours=2)
- COL.MAX_PERIOD = 10
- COL.MAX_PERIOD_DURATION = 30
The german output might look like:
"Regenereignis
von 01.01.2023 12:00 bis 14:00
mit einer Regensumme von 15.5 mm
und einer Dauer von 2 Stunden.
Die maximale Wiederkehrperiode war 10 a
bei einer Dauerstufe von 30 minutes."
The english output might look like:
"rain event
between 2023-01-01 12:00 and 14:00
with a total sum of 15.5 mm
and a duration of 2 hours.
The maximum return period was 10 years
at a duration of 30 minutes."
"""
captions = {
'en': {
'event': 'rain event',
'with_total': 'with a total sum of',
'and_duration': 'and a duration of',
'max_period': 'The maximum return period was',
'at_duration': 'at a duration of',
'years': 'years'
},
'de': {
'event': 'Regenereignis',
'with_total': 'mit einer Regensumme von',
'and_duration': 'und einer Dauer von',
'max_period': 'Die maximale Wiederkehrperiode war',
'at_duration': 'bei einer Dauerstufe von',
'years': 'a'
}
}
cap = captions.get(lang, captions['en'])
caption = f"{cap['event']}\n"
if (COL.START in event) and (COL.END in event):
start = event[COL.START]
end = event[COL.END]
if lang == 'de':
if start.date() == end.date():
# Beide Zeitpunkte sind am selben Tag
caption += f"am {start.strftime('%d.%m.%Y von %H:%M')} bis {end.strftime('%H:%M')}\n"
elif start.year == end.year:
# Beide Zeitpunkte im selben Jahr
if start.month == end.month:
# Beide Zeitpunkte im selben Jahr und Monat
caption += f"von {start.strftime('%d.%m.%Y %H:%M')} bis {end.strftime('%d. %H:%M')}\n"
else:
caption += f"von {start.strftime('%d.%m.%Y %H:%M')} bis {end.strftime('%d.%m. %H:%M')}\n"
else:
# Unterschiedliche Jahre
caption += f"von {start.strftime('%d.%m.%Y %H:%M')} bis {end.strftime('%d.%m.%Y %H:%M')}\n"
else:
caption += f'between {start:%Y-%m-%d %H:%M} and '
if f'{start:%Y-%m-%d}' == f'{end:%Y-%m-%d}':
# same day
caption += f'{end:%H:%M}\n'
elif f'{start:%Y-%m-}' == f'{end:%Y-%m-}':
# same month
caption += f'{end:%d %H:%M}\n'
else:
# different month
caption += f'{end:%Y-%m-%d %H:%M}\n'
if COL.LP in event:
caption += f"{cap['with_total']} {event[COL.LP]:0.1f} {unit}\n"
if COL.DUR in event:
caption += f"{cap['and_duration']} {timedelta_readable(event[COL.DUR], lang=lang)}.\n"
if COL.MAX_PERIOD in event:
caption += f"{cap['max_period']} {return_period_formatter(event[COL.MAX_PERIOD])} {cap['years']}\n"
if COL.MAX_PERIOD_DURATION in event:
caption += f"{cap['at_duration']} {minutes_readable(event[COL.MAX_PERIOD_DURATION])}."
return caption
[docs]
def timedelta_components_plus(td, min_freq='min'):
"""
Decomposes a timedelta into its components, approximating years and weeks.
Args:
td (datetime.timedelta or pandas.Timedelta): The time difference to decompose.
min_freq (str, optional): The minimum frequency for rounding (e.g., 'min', 's'). Defaults to 'min'.
Returns:
list: A list of lists, where each sublist contains a numerical value and its corresponding time unit.
Note:
Leap years are not considered in year calculations.
Possible components: [years, weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]
"""
list_of_components = []
if isinstance(td, datetime.timedelta):
td = pd.to_timedelta(td)
# years, weeks
days_year = 365
days_week = 7
for label_component, value in td.round(min_freq).components._asdict().items():
if label_component == 'days':
years, value = value // days_year, value % days_year
list_of_components.append([int(years), 'years'])
value -= years // 4
weeks, value = value // days_week, value % days_week
list_of_components.append([int(weeks), 'weeks'])
list_of_components.append([value, label_component])
return list_of_components
# Dictionary with English time units as keys and their German translations as values
time_units = {
"years": "Jahre",
"year": "Jahr",
"weeks": "Wochen",
"week": "Woche",
"days": "Tage",
"day": "Tag",
"hours": "Stunden",
"hour": "Stunde",
"minutes": "Minuten",
"minute": "Minute",
"seconds": "Sekunden",
"second": "Sekunde",
"milliseconds": "Millisekunden",
"millisecond": "Millisekunde",
"microseconds": "Mikrosekunden",
"microsecond": "Mikrosekunde",
"nanoseconds": "Nanosekunden",
"nanosecond": "Nanosekunde"
}
[docs]
def timedelta_components_readable(list_of_components, short=False, sep=', ', lang='en'):
"""
Converts a list of time components into a human-readable string.
Args:
list_of_components (list): A list of [value, unit] pairs representing time components.
short (bool, optional): If True, uses abbreviated unit names (e.g., 'y' for years). Defaults to False.
sep (str, optional): Separator between components. Defaults to ', '.
lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').
Returns:
str: A formatted string representing the time components, with the last component joined by "and".
Note:
- Singular units (e.g., "1 year" instead of "1 years") are handled automatically.
- The last separator is replaced with "and" for better readability unless short mode is enabled.
Example:
timedelta_components_readable([(2, 'days'), (3, 'hours')]) -> '2 days and 3 hours'
"""
result = []
for value, label_component in list_of_components:
if value > 0:
if short:
unit_sep = ''
unit = label_component[0]
else:
unit_sep = ' '
unit = label_component
if value == 1:
unit = label_component[:-1]
if lang == 'de':
unit = time_units.get(unit, unit)
result.append(f'{value}{unit_sep}{unit}')
s = sep.join(result)
if not short:
# replace last "," with "and"
s = ' and '.join(s.rsplit(sep, 1))
return s
[docs]
def timedelta_readable(td, min_freq='min', short=False, sep=', ', lang='en'):
"""
Converts a timedelta into a human-readable string.
Args:
td (datetime.timedelta or pandas.Timedelta): The time difference to format.
min_freq (str, optional): The minimum frequency for rounding (e.g., 'min', 's'). Defaults to 'min'.
short (bool, optional): Whether to use abbreviated unit names (e.g., 'h' for hours). Defaults to False.
sep (str, optional): Separator used between components in the output string. Defaults to ', '.
lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').
Returns:
str: A formatted string representing the time duration.
Note:
Leap years are not considered in year calculations.
Example:
timedelta_readable(pd.Timedelta(days=400, hours=5)) -> '1 year, 5 weeks and 5 hours'
"""
return timedelta_components_readable(timedelta_components_plus(td, min_freq), short=short, sep=sep, lang=lang)
[docs]
def timedelta_readable2(d1, d2, min_freq='min', short=False, sep=', ', lang='en'):
"""
Computes the difference between two dates and returns a human-readable string.
Args:
d1 (datetime-like): The start date.
d2 (datetime-like): The end date.
min_freq (str, optional): The minimum frequency for rounding (e.g., 'min', 's'). Defaults to 'min'.
short (bool, optional): Whether to use abbreviated unit names (e.g., 'h' for hours). Defaults to False.
sep (str, optional): Separator used between components in the output string. Defaults to ', '.
lang (str, optional): Language ('en' for English, 'de' for German, default: 'en').
Returns:
str: A formatted string representing the time difference.
Note:
- Approximates the number of years by adjusting for the closest full year.
- Leap years are not considered in year calculations.
Example:
timedelta_readable2(pd.Timestamp('2020-01-01'), pd.Timestamp('2023-06-15'))
-> '3 years, 5 months and 14 days'
"""
td = d2 - d1
years = None
if td > pd.Timedelta(days=365):
# date of d2 but year of d1
d2_new = d2.replace(year=d1.year)
if d2_new < d1:
# if day of d2 is earlier in the year ad day of d1
d2_new = d2_new.replace(year=d1.year + 1)
years = d2.year - d2_new.year
td = d2_new - d1
l = timedelta_components_plus(td, min_freq)
if years is not None:
l[0][0] = years
return timedelta_components_readable(l, short=short, sep=sep, lang=lang)