""" toolBox - submodule of SLOTH
"""
import sys
import os
import glob
import numpy as np
import datetime
import matplotlib.pyplot as plt
from calendar import monthrange
from . import IO as io
from scipy import ndimage as nd
import geopandas as gpd
import xarray as xr
import pandas as pd
from shapely.geometry import Polygon
import sloth.slothHelper as slothHelper
[docs]
def calc_catchment(slopex, slopey, x, y):
""" Calculate the catchment area associated with a given outlet pixel
using x- and y-slope data.
This function implements a simple yet efficient algorithm to determine the
catchment area by starting with a list of pixels that initially contains
only the outlet pixel(s). It iteratively processes the pixels in the list,
marking them as part of the catchment and identifying the surrounding pixels
that drain into the current pixel and are not already included in the
catchment. Foun pixels are added to the list and the algorithm continues
until all pixels belonging to the catchment area are discovered.
Parameters
----------
slopex : ndarray
2D slopes in x-direction
slopey : ndarray
2D slopes in y-direction
x : int
index in x-direction to calulate catchment from
y : int
index in y-direction to calulate catchment from
Returns
-------
catchment : ndarray
2D ndarray of the same size as slopex/y. 0 = not part of catchment; 1 = part of catchment
"""
dims = slopey.shape
nx = dims[1]
# convert slopes to: 'in which 1D index I do drain'
drainx = np.zeros_like(slopex)
drainx = np.where(slopex<0, 1, drainx)
drainx = np.where(slopex>0, -1, drainx)
drainx = np.where(slopex==0, -999, drainx)
drainy = np.zeros_like(slopey)
drainy = np.where(slopey<0, nx, drainy)
drainy = np.where(slopey>0, -nx, drainy)
drainy = np.where(slopey==0, -999, drainy)
# plt.imshow(drainx)
# plt.show()
# FlatDrainX
fdx = drainx.ravel(order='C')
# FlatDrainY
fdy = drainy.ravel(order='C')
# FlatCatchment
fc = np.zeros_like(fdx)
# flat_idx order='C': idx = (y * nx) + x
start = (y * nx ) + x
openEnds = [start]
while openEnds:
# Get one open end
step = openEnds.pop()
# Mark open end as catchment
fc[step] = 1
# GET surrounding Pixel
# Nort = step - nx; East = step +1
# West = step - 1 ;South = step + nx
# NEWS = [step-nx, step+1, step-1, step+nx]
NS = [step-nx, step+nx]
EW = [step+1, step-1]
# CHECK if surrounding drain to step (D2S) and are NOT catchment already.
# Step is the current handled open end.
try:
NSD2S = [ idx for idx in NS if (idx + fdy[idx] == step and not fc[idx] ) ]
EWD2S = [ idx for idx in EW if (idx + fdx[idx] == step and not fc[idx] ) ]
except IndexError:
print('FEHLER')
continue
D2S = NSD2S + EWD2S
# add all found pixes to openEnds
openEnds += D2S
return fc.reshape(dims)
[docs]
def get_intervalSlice(dates, sliceInterval='month'):
''' This functions calculates interval slices of a given time-series
This function calculates interval slices of a given time-series,
aiming to mimic pythons default slice notation `array[start:stop:step]`
to also enable multi dimensional slicing.
The calculation takes the models dumpintervall into account, wherefore
this function is working for (nearly) every time-resolution.
The calculation is based on datetime-objects and therefore really
calculates the slice of individual interval, even if the time-series
does not start or end at the first or last of a given interval.
Use case:
You got a time-series of hourly data points and want to calculate the
monthly mean. This functions returns the slices needed to divide your
data into monthly peaces.
Parameters
----------
dates : NDarray
the time-series as datetime-object.
sliceInterval : str
defining the interval. Supported are: 'day', 'month'
Returns
-------
Slices : list
A list with length of found intervals with related slices
Notes
-----
This function assumes:
i) The passed time-series covers at least on intervall!
ii) At least hourly steps / dumpIntervals!
iii) No seconds in dates - model output is at least on full minutes!
'''
# Check is passed sliceInterval is supported and exit if not.
supportedSliceInterval = ['day', 'month']
if sliceInterval not in supportedSliceInterval:
print(f'ERROR: sliceInterval "{sliceInterval}" is not supported.')
print(f'--- supported values are: {supportedSliceInterval}')
print(f'--- EXIT program')
return False
print(f'########################################################################')
print(f'#### calculate dumpInterval and check if equal for all data-points')
print(f'########################################################################')
# Calculating dumpInterval
tmp_dumpInterval = np.diff(dates, n=1)
# Check if dumpInterval equal for all data-points
# and if so define set dumpInterval
if not np.all(tmp_dumpInterval == tmp_dumpInterval[0]):
print('ERROR: calculated dumpInterval is not equal for all data-points')
# In case of error: break function
return False
else:
dumpInterval = tmp_dumpInterval[0]
print(f'DONE --> DumpInterval: {dumpInterval}')
# Finding the first time-step of a interval, by looping over the time-series and
# check for each time-step if this is the first of the interval (break loop if found).
print(f'########################################################################')
print(f'#### Finding first of month')
print(f'########################################################################')
Offset = 0
while True:
# verify first date is first of interval.
# idea explained with sliceInterval='month':
# first step - dumpInterval (or 0.5*dumpInterval) is first of month 00UTC
# By checking 1*dumpInterval and 0.5*dumpInterval we do take into account, that
# the time-axis of averaged model output might got shifted in between the
# time-bounds.
print(f'Offset: {Offset}')
tmp_first = dates[Offset]
if sliceInterval == 'day':
# using midnight as reference
tmp_first_ref = tmp_first.replace(hour=0, minute=0, second=0)
elif sliceInterval == 'month':
# using the first of current month at midnight as reference
tmp_first_ref = tmp_first.replace(day=1, hour=0, minute=0, second=0)
# First time-step of dates is already first of a interval
if (tmp_first == tmp_first_ref):
print(f'check step {Offset} is first of a month at midnight')
break
#if (tmp_first - dumpInterval) == tmp_first_ref:
# print(f'check step {Offset} is first of a month at midnight')
# break
# First time-step is not the exact first of a interval but set in
# middle of time-bounds and therefore halfe a dumpInteral away from
# first date of a interval
elif (tmp_first - (0.5*dumpInterval)) == tmp_first_ref:
print(f'check step {Offset} is first of a month at midnight')
break
else:
print(f'ERROR: step {Offset} is not first step of month at midnight!')
print(f'step: {tmp_first}')
Offset += 1
# hard break of loop if no 'beginning' is found after 50 time-steps
if Offset > 50:
break
continue
# Calculating the slices by looping over all time-steps and check if the
# next time-step belong to the next interval.
# NWR 20210422:
# there should be some clever and short solution for below loop ...!
# now that we know the dumpInterval and first step is first of interval...
print(f'########################################################################')
print(f'#### getting month series / slices')
print(f'########################################################################')
t_lower = Offset
Slices = []
for t in range(Offset, dates.shape[0]):
# I decided to go for the solution checking current month and next month
# to catch the case if the dateset contains one month only!
if sliceInterval == 'day':
currInterval = dates[t].day
nextInterval = (dates[t] + dumpInterval).day
elif sliceInterval == 'month':
currInterval = dates[t].month
nextInterval = (dates[t] + dumpInterval).month
if nextInterval != currInterval:
Slices.append(slice(t_lower,t+1,None))
t_lower = t+1
return Slices
[docs]
def spher_dist_v1(lon1, lat1, lon2, lat2, Rearth=6373):
""" calculate the spherical / haversine distance
This function calculates the real distance (in [km]) between two points on
earth given in lat/lon coordinates.
Parameters
----------
lon1 : ndarray
Longitude value in [rad] of first point in [rad]. Could be any dim
lat1 : ndarray
Latitude value in [rad] of first point in [rad]. Could be any dim
lon2 : ndarray
Longitude value in [rad] of second point in [rad]. Could be any dim
lat2 : ndarray
Latitude value in [rad] of second point in [rad]. Could be any dim
Rearth : int or float
The earth radius in [km].
Returns
-------
ndarray
The distance is returned in [km]
Notes
-----
Source: https://www.kompf.de/gps/distcalc.html
"""
term1 = np.sin(lat1) * np.sin(lat2)
term2 = np.cos(lat1) * np.cos(lat2)
term3 = np.cos(lon2 - lon1)
return Rearth * np.arccos(term1+term2*term3)
[docs]
def find_nearest_Index_2D(point, coord2D):
"""
Find the nearest index in a 2D array.
Given a 2D array `coord2D` and a specific `point` value, this function returns
the index of the 2D array that holds the closest values to the `point`.
Parameters
----------
point : scalar
The value for which to find the nearest index in the 2D array.
coord2D : ndarray
The 2D array in which to search for the nearest index.
Returns
-------
(int, int)
The index in the first and second dimensions of `coord2D` that holds the
closest values to the `point`.
Notes
-----
This function calculates the absolute differences between `coord2D` and `point`
using `np.abs()`. It then determines the index of the minimum value in the
differences array using `np.argmin()`. The resulting flattened index is converted
into a tuple of indices representing the original shape of `coord2D` using
`np.unravel_index()`.
Examples
--------
>>> import numpy as np
>>> arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
>>> find_nearest_Index_2D(5, arr)
(1, 1)
>>> arr = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]])
>>> find_nearest_Index_2D(0.25, arr)
(0, 1)
"""
dist = np.abs(coord2D - point)
idx = np.unravel_index(np.argmin(dist, axis=None),dist.shape)
return idx[0], idx[1]
[docs]
def plusOneMonth(currDate):
"""
Return the passed date incremented by one month.
This function provides a simple way to calculate a date that is one month ahead
of the given `currDate`. Unlike the bash command-line tool `date`, which can
handle this calculation easily, Python's `datetime` objects are based on hours
or seconds, requiring special treatment for adding one month due to varying
month lengths. This function serves as a wrapper to handle this task in a clean
manner, facilitating usage in other code snippets.
Parameters
----------
currDate : datetime
An arbitrary date.
Returns
-------
datetime
The passed date incremented by one month.
Notes
-----
This function determines the number of days in the month of `currDate` using
`monthrange()` from the `calendar` module. It then returns `currDate` incremented
by a timedelta calculated as `24 * num_days` hours. This ensures that the returned
date correctly reflects one month ahead while accounting for varying month lengths.
Examples
--------
>>> import datetime
>>> currDate = datetime.datetime(2023, 5, 15)
>>> plusOneMonth(currDate)
datetime.datetime(2023, 6, 15, 0, 0)
>>> currDate = datetime.datetime(2023, 12, 31)
>>> plusOneMonth(currDate)
datetime.datetime(2024, 1, 31, 0, 0)
"""
num_days = monthrange(currDate.year, currDate.month)[1]
return currDate + datetime.timedelta(hours=24*num_days)
[docs]
def trunc(values, decs=0):
"""
Truncate a passed float value or floating ndarray.
This function truncates a given float value or floating ndarray by a
specified truncation precision (decimal digits).
Parameters
----------
values : ndarray
A ndarray of any dimension (including scalar) containing float values to be truncated.
decs : int, optional
An integer value defining the truncation precision. It specifies the number of decimal places to keep (default is 0).
Returns
-------
ndarray
A ndarray of the same type as the input, with the specified number of decimal places truncated from the original values.
Examples
--------
>>> import numpy as np
>>> values = np.array([2.12345, 3.45678, 4.56789])
>>> trunc(values, decs=3)
array([2.123, 3.456, 4.567])
>>> value = 2.98765
>>> trunc(value, decs=2)
2.98
"""
return np.trunc(values*10**decs)/(10**decs)
[docs]
def fill(data, invalid=None, transkargs={}):
"""
Fill invalid data points with nearest neighbor interpolation.
This function replaces invalid data points in the input array (`data`) with the
value of the nearest valid data point. Invalid data points are indicated by the
`invalid` array or, if not provided, by NaN (Not a Number) values in `data`.
Parameters
----------
data : ndarray
An array containing the data to be filled.
invalid : ndarray, optional
A binary array of the same shape as `data`. Data values are replaced where
`invalid` is True. If not provided, invalid data points are identified using
`np.isnan(data)` (default).
transkargs : dict, optional
Additional arguments to pass to the `distance_transform_edt()` function.
Default is an empty dictionary.
Returns
-------
ndarray
A filled ndarray, where invalid data points have been replaced by the value
of the nearest valid data point.
Notes
-----
This function uses nearest neighbor interpolation to fill invalid data points in
the input array. It calculates the Euclidean distance transform of the `invalid`
array to find the indices of the nearest valid data points. The original `data`
array is then updated with the values from the nearest valid data points.
If the shapes of `data` and `invalid` are not equal, an error is raised, as filling
is not possible.
See also: https://stackoverflow.com/questions/5551286/filling-gaps-in-a-numpy-array
Example
-------
>>> import numpy as np
>>> data = np.array([1.0, 2.0, np.nan, 4.0, np.nan])
>>> fill(data)
array([1., 2., 4., 4., 4.])
>>> invalid = np.isnan(data)
>>> fill(data, invalid)
array([1., 2., 4., 4., 4.])
"""
# check if data and invalid shape is equal, as otherwise filling is not
# possible
if data.shape != invalid.shape:
print(f'ERROR: data.shape {data.shape} != invalid.shape (invalid.shape)')
if invalid is None: invalid = np.isnan(data)
ind = nd.distance_transform_edt(invalid,
return_distances=False,
return_indices=True,
**transkargs)
return data[tuple(ind)]
[docs]
def get_prudenceMask(lat2D, lon2D, prudName):
"""
Return a Prudence mask based on latitude and longitude values.
This function generates a boolean mask array where True represents masked areas
and False represents non-masked areas. The mask is determined based on a set of
latitude and longitude values and the name of the Prudence region.
Parameters
----------
lat2D : ndarray
2D array containing latitude information for each pixel.
lon2D : ndarray
2D array containing longitude information for each pixel.
prudName : str
Short name of the Prudence region.
Returns
-------
prudMask : ndarray
Boolean ndarray of the same shape as `lat2D`, indicating masked and
non-masked areas.
True = masked; False = not masked.
Notes
-----
The Prudence mask is created based on specific latitude and longitude ranges
for each Prudence region. The function checks the `prudName` parameter and
generates the corresponding mask using NumPy's `np.where()` function.
The available Prudence region names and their corresponding latitude and
longitude ranges are as follows (True is masked, Fals is not masked):
- 'BI': Latitude: <50.0 or >59.0, Longitude: <-10.0 or >2.0
- 'IP': Latitude: <36.0 or >44.0, Longitude: <-10.0 or >3.0
- 'FR': Latitude: <44.0 or >50.0, Longitude: <-5.0 or >5.0
- 'ME': Latitude: <48.0 or >55.0, Longitude: <2.0 or >16.0
- 'SC': Latitude: <55.0 or >70.0, Longitude: <5.0 or >30.0
- 'AL': Latitude: <44.0 or >48.0, Longitude: <5.0 or >15.0
- 'MD': Latitude: <36.0 or >44.0, Longitude: <3.0 or >25.0
- 'EA': Latitude: <44.0 or >55.0, Longitude: <16.0 or >30.0
If an unsupported Prudence region name is provided, an error message is printed,
and the program exits.
For reference see also: http://prudence.dmi.dk/public/publications/PSICC/Christensen&Christensen.pdf p.38
"""
if (prudName=='BI'):
prudMask = np.where((lat2D < 50.0) | (lat2D > 59.0) | (lon2D < -10.0) | (lon2D > 2.0), True, False)
elif (prudName=='IP'):
prudMask = np.where((lat2D < 36.0) | (lat2D > 44.0) | (lon2D < -10.0) | (lon2D > 3.0), True, False)
elif (prudName=='FR'):
prudMask = np.where((lat2D < 44.0) | (lat2D > 50.0) | (lon2D < -5.0) | (lon2D > 5.0), True, False)
elif (prudName=='ME'):
prudMask = np.where((lat2D < 48.0) | (lat2D > 55.0) | (lon2D < 2.0) | (lon2D > 16.0), True, False)
elif (prudName=='SC'):
prudMask = np.where((lat2D < 55.0) | (lat2D > 70.0) | (lon2D < 5.0) | (lon2D > 30.0), True, False)
elif (prudName=='AL'):
prudMask = np.where((lat2D < 44.0) | (lat2D > 48.0) | (lon2D < 5.0) | (lon2D > 15.0), True, False)
elif (prudName=='MD'):
prudMask = np.where((lat2D < 36.0) | (lat2D > 44.0) | (lon2D < 3.0) | (lon2D > 25.0), True, False)
elif (prudName=='EA'):
prudMask = np.where((lat2D < 44.0) | (lat2D > 55.0) | (lon2D < 16.0) | (lon2D > 30.0), True, False)
else:
print(f'prudance region {prudName} not found --> EXIT')
eys.exit(1)
return prudMask
[docs]
def stampLLSM(data, invalid, LLSM, LLSMThreshold=2):
""" stamps a LLSM to passed data
Some times its needed to force different datasets to use the same
Land-Lake-Sea-Mask (LLSM). One example is combining different data sets to
force a model with. If the datasets are taken fomr different sources, most
propably the used LLSM is different too. This, for exmple, could lead to the
situation along the coastline that one dataset is indicating water (ocean)
while the other is indicating land. This inconsistency has to be fixe,
especially within the realm of coupled models.
To achive this, this function is\n
i) masking invalid pixels within the passed data set
ii) interpolate remaining data over the maske regions
iii) set pixel to water value according to a passed LLSM
This guaranthees each land pixel is holding land informtion and each
water pixel is marked as water.
Parameters
----------
data : ndarray
A 2D nd array holding the datset to stamp the LLSM
invalid : scalar
A scalar witht the value indicating invalid pixel
LLSM : ndarray
A ndarray of the same shape as data holding the LLSM
(Land=2 Lake=1 Sea=0)
Returns
-------
ndarray
A (MASKED!) ndarray of the same shape as data, with stamped LLSM
"""
# some checks to verify we can progress
if not isinstance(data, np.ndarray):
print(f'data is of type {type(data)} but <class "numpy.ndarray"> is required!')
sys.exit(1)
if not data.ndim == 2:
print(f'data is of dimension {data.ndim} but dimension 2 is required!')
sys.exit(1)
# i)
out = np.ma.masked_where(data==invalid, data)
# ii)
out = fill(data=out, invalid=out.mask)
# iii)
out = np.ma.masked_where((LLSM < LLSMThreshold), out)
return out
[docs]
def mapDataRange_lin(X, y_min=0, y_max=1,
x_min=None, x_max=None,
cutMinMax=False):
"""Map a source data range linearly to a target data range.
Perform a linear mapping of a source data range (X) to a target data range (Y). The
function calculates the mapping using the formula: y = (y_max - y_min) / (x_max - x_min) * (x - x_min) + y_min.
Parameters
----------
X : ndarray
Source data range to remap.
y_min : scalar, optional
Minimum value of the target data range (default is 0).
y_max : scalar, optional
Maximum value of the target data range (default is 1).
x_min : scalar, optional
Minimum value of the source data range. If not provided, it is calculated based on X.
x_max : scalar, optional
Maximum value of the source data range. If not provided, it is calculated based on X.
cutMinMax : bool, optional
If True, cut the mapped values outside the target data range to the minimum and maximum values.
Returns
-------
ndarray
The target data range after linear mapping.
Notes
-----
This function is useful for mapping a source data range to a different target data range. It can be
used for various purposes, such as normalizing data ranges to facilitate comparison with other ranges.
An intermediat step is used by first transform both (X and Y) into data
ranges starting from zero (X' and Y'), as those data range can be easily
mapped with
i) y' = y'_max / x'_max * x'
ii) y' = y - y_min AND x' = x - x_min
iii) y = (y_max-y_min) / (x_max-x_min) * (x-x_min) + y_min
Examples
--------
>>> X = np.array([0, 1, 2, 3, 4, 5])
>>> mapped = mapDataRange_lin(X, y_min=10, y_max=20)
>>> print(mapped)
[10. 12. 14. 16. 18. 20.]
>>> X = np.array([100, 200, 300, 400, 500])
>>> mapped = mapDataRange_lin(X, y_min=-1, y_max=1, x_min=100, x_max=500)
>>> print(mapped)
[-1. -0.5 0. 0.5 1. ]
"""
# Calculate x_min and x_max if not passed
if x_min is None:
x_min = np.min(X)
if x_max is None:
x_max = np.max(X)
# Map X to new data range
Y = (y_max-y_min) / (x_max-x_min) * (X-x_min) + y_min
# Cut Y for min and max values if wanted
if cutMinMax:
Y = np.where(Y<=y_min, y_min, Y)
Y = np.where(Y>=y_max, y_max, Y)
return Y
[docs]
def intersection_calculations(df_data, corners, area_of_interest, Name_area, crs_utm, nr_yr, nr_entries, save_dir):
""" Calculate spatial mean values for a shapefile in interest.
This function calculates spatial mean values (example for a specific
region or province) taking into account how much (ratio) of the
gridpoint is actually being intersected inside our region of interest.
Parameters
-------------
df_data: dataframe
dataframe containing infromation about each gridpoint
corners: dataframe (can also be a nectdf or any other data type)
containing infromation on the four longitudes and latitudes
that surrounds each gridpoint in the source dataframe
area_of_interest: a shapefile or geodataframe
shapefile of the area of interest
Name_area: the field name in the shapefile, that the dissolve will be based on
crs_utm: projected coordinate reference system (utm)
nr_yr: variable
number of years of interest
nr_entries:variable
number of hours/days/or months etc..
save_dir: path
path for saving the output
"""
# create a geodataframe from the dataframe that we want to work with
df_data = pd.read_csv(df_data)
print("creating a geodataframe of the given dataframe")
gdf_data = gpd.GeoDataFrame(df_data, geometry=gpd.points_from_xy(df_data.lon,
df_data.lat))
ds_corners = xr.open_dataset(corners)
# convert the netcdf to dataframe in order to be able
# to convert it to geopandas and create polygons
df_corners = ds_corners.to_dataframe()
df_corners = df_corners.reset_index()
df_corners = df_corners.iloc[:, 4:]
# create a list of the four pairs (lon,lat) that creates the polygons
print("creating a list of four pairs (lon,lat) to create polygons")
list_poly = []
polygon_geom = list(zip(df_corners.grid_corner_lon, df_corners.grid_corner_lat))
for i in range(0, len(polygon_geom), 4):
list_poly.append(Polygon(polygon_geom[i:i + 4]))
# create a geodataframe from the polygon list that was created
print("creating a geodataframe with the polygons")
gdf_polygons = gpd.GeoDataFrame(geometry=list_poly)
# for some reason the polygons were doubled so a drop_duplicates() was used
gdf_polygons = gdf_polygons.drop_duplicates()
# if the two goedataframes are do not have a coordinate systems
# we have to set it first before going further setting the crs
gdf_data_wgs = gdf_data.set_crs('EPSG:4326') # setting the crs to WGS84
# in order to calculate the area later, it is needed to convert
# the geodataframe to UTM
gdf_data_utm32N = gdf_data_wgs.to_crs(crs_utm)
gdf_polygons_wgs = gdf_polygons.set_crs('EPSG:4326') # setting the crs to WGS84
gdf_polygons_utm32N = gdf_polygons_wgs.to_crs(crs_utm)
print("Geodataframes are created")
# calculate the area of the polygons.
# it is important for calculating the ratio (weight) of the area of
# each polygon interscted
gdf_polygons_utm32N['area'] = gdf_polygons_utm32N.area
# after calculating the area, in come cases (when calculating for whole country)
# the area of polygons will differ for example in when comparing cities along
# the meridian lines, there will be slight difference in the area
# joining thw two geodataframe using sjoin, in order to join the dataframe
# that has the values with its corresponding polygons
print("Joining the two geodataframes")
join_within_right_gdf_utm32N = gdf_data_utm32N.sjoin(gdf_polygons_utm32N, how="right", predicate="within")
shapefile_gdf = gpd.read_file(area_of_interest)
print(shapefile_gdf.crs) # to check wether it has the same crs as the geodataframe that we created
# if not convert it to utm (to be able to calculate the area)
print("intersecting")
overlay_gdf = gpd.overlay(join_within_right_gdf_utm32N, shapefile_gdf, how='intersection')
overlay_gdf['area_intersected'] = overlay_gdf.area
# the weight calculated below represents how much area from each polygon
# is inside the shapefile after intesection
overlay_gdf['weight'] = overlay_gdf['area_intersected']/overlay_gdf['area']
# after this step we should have a geodataframe for the intersected polygons
# that are inside the shapefile of interest the geodataframe will have
# the lon,lat, with the corresponding values for each date (each date in a column),
# area of the polygon before intersection, area after intersection,
# and the ratio (area_intersected/area) and it can also include the names of the
# regions (in order to be able to dissolve it into mean values for each region)
# in the next step, a dataframe was created to add the values of each
# polygon (gridpoint) after calculating its weight inside
# the intersection for each date
nr_yr = nr_yr
nr_entries = nr_entries # ex: number of dates :hourly, daily, monthly..etc
nr_column = nr_yr*nr_entries
df_data_inter = overlay_gdf.iloc[:,3:nr_column+2].multiply(overlay_gdf['weight'], axis="index")
# copy the geometry column (important for creating a geodataframe) and other
# relevant data to the new dataframe
# like the name of the regions (important for calculating the mean values for each region)
Name_area = Name_area
df_data_inter[Name_area] = overlay_gdf[Name_area] # this sould be adapted depending on the shapefile available
df_data_inter['geometry'] = overlay_gdf['geometry']
geometry = df_data_inter['geometry']
gdf_data_inter = gpd.GeoDataFrame(df_data_inter, crs="epsg:25832", geometry=geometry)
print("calculating the mean value for each region of interest")
dissolve_gdf = gdf_data_inter.dissolve(by=Name_area, aggfunc='mean', as_index=True, level=None, sort=True, observed=False, dropna=True)
dissolve_gdf_wgs = dissolve_gdf.to_crs('EPSG:4326') # convert back to wgs 1984
print("saving as a shapefile")
dissolve_gdf_wgs.to_file(save_dir)
if __name__ == '__main__':
print('Im there!')