Find and analyse drought events#
To analyse drought events we need to set baselines for drought events. The features we focus on are the duration of each drought and the area affected. We first need to identify all the historical drought events of the region we focus on and then extract these indexes and subsequently the baselines for the region. These baselines can show us if an event was anomalous.
Import necessary libraries#
import sys
import os
import glob
import xarray as xr
from functools import partial
import datetime
import numpy as np
import plotly.graph_objects as go
import dask.array as da
import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import label, generate_binary_structure
import geopandas as gpd
import pandas as pd
from scipy.ndimage import label, generate_binary_structure
import hvplot.xarray # to plot xarray with hvplot
import cartopy.crs as ccrs
import sys
from pyprojroot import here
root = here()
sys.path.append(str(root / "chapters/shared/"))
from utils.widgets_handler import read_json_to_dict
color_palette_json = 'color_palette_bright.json'
cmap = read_json_to_dict(color_palette_json)
cmap['No Data'] = '#cccccc'
Load data function#
def get_spi_dataset(acc_period: str = 1, years: list = [2020]):
data_root_folder = '/data1/drought_dataset/spi/'
spi_folder = os.path.join(data_root_folder, f'spi{acc_period}')
spi_paths = []
for year in years:
spi_paths.extend(sorted(glob.glob(
f'{data_root_folder}spi{acc_period}/SPI{acc_period}_gamma_global_era5_moda_ref1991to2020_{year}*.nc')))
return xr.open_mfdataset(spi_paths, chunks={'time': "auto"}, concat_dim="time", combine='nested', parallel=False)
def get_spei_dataset(acc_period: str = 1, years: list = [2020]):
data_root_folder = '/data1/drought_dataset/spei/'
spi_folder = os.path.join(data_root_folder, f'spi{acc_period}')
spi_paths = []
for year in years:
spi_paths.extend(sorted(glob.glob(
f'{data_root_folder}spei{acc_period}/SPEI{acc_period}_genlogistic_global_era5_moda_ref1991to2020_{year}*.nc')))
return xr.open_mfdataset(spi_paths, chunks={'time': "auto"}, concat_dim="time", combine='nested', parallel=False)
def mask_invalid_values(ds, variable, value=-9999):
ds[variable] = ds[variable].where(ds[variable] != value, np.nan)
return ds
def subset_region(dataset, variable, bbox):
# data = dataset.sel(time=np.datetime64(time), method='nearest')[variable]
# Define the geographical boundaries for Madagascar
lat_bounds = [bbox[1], bbox[3]] # from south to north
lon_bounds = [bbox[0], bbox[2]] # from west to east
# Check for NaN values in latitude and longitude coordinates
lat_nan = dataset['lat'].isnull().any()
lon_nan = dataset['lon'].isnull().any()
# Handle NaN values if they exist
if lat_nan:
dataset = dataset.dropna(dim='lat', how='all')
if lon_nan:
dataset = dataset.dropna(dim='lon', how='all')
# Ensure no NaN values in the data itself
dataset = dataset.fillna(np.nan) # or use another appropriate method like interpolation
# Ensure the lat/lon bounds are within the data's range
lat_min, lat_max = dataset['lat'].min().item(), dataset['lat'].max().item()
lon_min, lon_max = dataset['lon'].min().item(), dataset['lon'].max().item()
if lat_bounds[0] < lat_min or lat_bounds[1] > lat_max or lon_bounds[0] < lon_min or lon_bounds[1] > lon_max:
raise ValueError("The specified latitude/longitude bounds are outside the range of the dataset.")
# Subset the data using where and dropna
dataset = dataset.where(
(dataset['lat'] >= lat_bounds[0]) & (dataset['lat'] <= lat_bounds[1]) &
(dataset['lon'] >= lon_bounds[0]) & (dataset['lon'] <= lon_bounds[1]),
drop=True
)
# return xr.Dataset(data)
return dataset
def get_spei_significance_dataset(variable='SPEI1', year=2020):
data_root_folder='/data1/drought_dataset/spei/'
quality_paths = []
for month in range(1, 13):
month_str = f'{month:02d}'
quality_paths.append(f'{data_root_folder}{variable.lower()}/parameter/{variable}_significance_global_era5_moda_{year}{month_str}_ref1991to2020.nc')
return xr.open_mfdataset(quality_paths, concat_dim="time", combine='nested', parallel=False)
def get_spi_significance_dataset(variable='SPI1', year=2020):
data_root_folder='/data1/drought_dataset/spi/'
quality_paths = []
for month in range(1, 13):
month_str = f'{month:02d}'
quality_paths.append(f'{data_root_folder}{variable.lower()}/parameter/{variable}_significance_global_era5_moda_{year}{month_str}_ref1991to2020.nc')
return xr.open_mfdataset(quality_paths, concat_dim="time", combine='nested', parallel=False)
Load dataset#
# Load dataset
spei_data = get_spei_dataset(acc_period=12, years=list(range(1940, 2025)))
spei48_region = mask_invalid_values(spei_data, variable='SPEI12')
Filter dataset for specific bounding box#
# Get a subset of the dataset for a bbox
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = world.to_crs(epsg=4326)
# country_list = world['name'].unique().tolist()
# country_list.sort()
# country_shape = world[world['name'] == 'Kenya']
# country_shape = world[world['name'] == 'S. Sudan']
country_shape = world[world['name'] == 'S. Sudan']
spei_data = spei_data.rio.write_crs("EPSG:4326", inplace=True)
spei_data_country = spei48_region.rio.clip(country_shape.geometry, world.crs, drop=True)
spei = spei_data_country['SPEI12']
/tmp/ipykernel_2562388/3093083712.py:2: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
Plot animated SPEI dataset#
spei.hvplot(
clim=(-8,8),
groupby="time",
widget_type="scrubber",
widget_location="bottom",
projection=ccrs.PlateCarree(),
coastline='10m',
cmap='BrBG',
features=['borders']
)
Setup drought severity classification function and classes#
Analyse each month and find the area coverage for each condition. The month classification is the worst condition that covers at least a percentage of the area.#
The classes are as bellow:
‘Extremely dry’: \(SPEI < -2\)
‘Severely dry’: \(-2 < SPEI < -1.5\)
‘Moderately dry’: \(-1.5 < SPEI < -1\)
‘Mildly dry’: \(-1 < SPEI < 0\)
‘Mildly wet’: \(0 < SPEI < 1\)
‘Moderately wet’: \(1 < SPEI < 1.5\)
‘Severely wet’: \(1.5 < SPEI < 2\)
‘Extremely wet’: \(SPEI < 2\)
import xarray as xr
import numpy as np
def classify_drought_severity(spei, classes, conditions, threshold_percentage=50):
"""
Classifies drought severity based on SPEI values and percentage of grid points in each class.
Parameters:
- spei: An xarray DataArray containing SPEI values (dimensions: time, lat, lon).
- classes: A list of class names (e.g., ['Extreme Drought', 'Severe Drought', ...]).
- conditions: A list of conditions corresponding to each class (Boolean conditions for the grid).
- threshold_percentage: Minimum percentage of grid points required to classify a time step into a specific class.
Returns:
- result_df: A pandas DataFrame with counts and percentages of grid points in each class for each time step,
including a 'Final Classification' column based on the percentage threshold.
"""
# Calculate the total number of grid points (excluding NaN values if any)
total_grid_points = spei.notnull().sum(dim=['lat', 'lon'])
# Count the number of grid points in each condition for each time step
counts = [condition.sum(dim=['lat', 'lon']) for condition in conditions]
# Combine counts along a new dimension called 'class'
counts_concat = xr.concat(counts, dim=pd.Index(classes, name="class"))
# Convert to DataFrame
counts_df = counts_concat.to_dataframe(name='count').reset_index()
# Pivot the DataFrame to have classes as columns
result_df = counts_df.pivot(index='time', columns='class', values='count').fillna(0)
# Add total grid points to the result DataFrame
result_df['Total Grid Points'] = total_grid_points.values
# Calculate the percentage of grid points for each class
for class_name in classes:
result_df[f'{class_name} Percentage'] = (result_df[class_name] / result_df['Total Grid Points']) * 100
# Determine the final classification for each time step based on the percentage threshold
def classify_row(row):
for class_name in classes:
if row[f'{class_name} Percentage'] >= threshold_percentage:
return class_name
return 'No Data' # If no class meets the threshold
result_df['Final Classification'] = result_df.apply(classify_row, axis=1)
return result_df
# Example usage
# Load the dataset (assuming it's already in xarray format)
# ds = xr.open_dataset('your_dataset.nc') # Uncomment if loading from file
# spei = ds['SPEI'] # Replace 'SPEI' with your actual variable name
# Define the conditions and corresponding classes
conditions = [
spei < -2, # 'Extremely dry'
(spei >= -2) & (spei < -1.5), # 'Severely dry'
(spei >= -1.5) & (spei < -1), # 'Moderately dry'
(spei >= -1) & (spei < 0), # 'Mildly dry'
(spei >= 0) & (spei <= 1), # 'Mildly wet'
(spei >= 1) & (spei <= 1.5), # 'Moderately wet'
(spei >= 1.5) & (spei <= 2), # 'Severely wet'
spei > 2 # 'Extremely wet'
]
classes = ['Extremely dry',
'Severely dry',
'Moderately dry',
'Mildly dry',
'Mildly wet',
'Moderately wet',
'Severely wet',
'Extremely wet']
Classify months in SPEI#
# Get the result DataFrame
result_df = classify_drought_severity(spei, classes, conditions, threshold_percentage=20)
result_df = result_df.reset_index()
# Output the result
result_df
class | time | Extremely dry | Extremely wet | Mildly dry | Mildly wet | Moderately dry | Moderately wet | Severely dry | Severely wet | Total Grid Points | Extremely dry Percentage | Severely dry Percentage | Moderately dry Percentage | Mildly dry Percentage | Mildly wet Percentage | Moderately wet Percentage | Severely wet Percentage | Extremely wet Percentage | Final Classification |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1940-01-01 06:00:00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | No Data |
1 | 1940-02-01 06:00:00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | No Data |
2 | 1940-03-01 06:00:00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | No Data |
3 | 1940-04-01 06:00:00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | No Data |
4 | 1940-05-01 06:00:00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | No Data |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1004 | 2023-09-01 06:00:00 | 756 | 0 | 5 | 0 | 13 | 0 | 44 | 0 | 818 | 92.420538 | 5.378973 | 1.589242 | 0.611247 | 0.000000 | 0.0 | 0.0 | 0.0 | Extremely dry |
1005 | 2023-10-01 06:00:00 | 723 | 0 | 14 | 0 | 27 | 0 | 54 | 0 | 818 | 88.386308 | 6.601467 | 3.300733 | 1.711491 | 0.000000 | 0.0 | 0.0 | 0.0 | Extremely dry |
1006 | 2023-11-01 06:00:00 | 644 | 0 | 33 | 2 | 41 | 0 | 98 | 0 | 818 | 78.728606 | 11.980440 | 5.012225 | 4.034230 | 0.244499 | 0.0 | 0.0 | 0.0 | Extremely dry |
1007 | 2023-12-01 06:00:00 | 647 | 0 | 34 | 1 | 38 | 0 | 98 | 0 | 818 | 79.095355 | 11.980440 | 4.645477 | 4.156479 | 0.122249 | 0.0 | 0.0 | 0.0 | Extremely dry |
1008 | 2024-01-01 06:00:00 | 652 | 0 | 40 | 1 | 39 | 0 | 86 | 0 | 818 | 79.706601 | 10.513447 | 4.767726 | 4.889976 | 0.122249 | 0.0 | 0.0 | 0.0 | Extremely dry |
1009 rows × 19 columns
Generate barplot for the dataset to visuallize drought events#
# import matplotlib.pyplot as plt
# import pandas as pd
# import numpy as np
# import matplotlib.dates as mdates
# # Map the classifications to colors
# result_df['Color'] = result_df['Final Classification'].map(cmap)
# # Create the plot
# plt.figure(figsize=(12, 4)) # Adjust the width and height of the plot
# # Plot bars
# plt.bar(result_df['time'], 1, color=result_df['Color'], width=60, align='edge') # Adjust width for visibility
# # Customize x-axis and y-axis
# plt.gca().yaxis.set_visible(False) # Hide y-axis
# # Set x-axis major locator and formatter to show only yearly ticks
# plt.gca().xaxis.set_major_locator(mdates.YearLocator()) # Place ticks at yearly intervals
# plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y')) # Format x-axis labels to show only year
# # Set x-axis limits
# plt.xlim(pd.Timestamp(result_df.time.min()), pd.Timestamp(result_df.time.max()))
# # Rotate x-axis labels for better readability
# plt.xticks(rotation=90)
# # Label the x-axis
# plt.xlabel('Time')
# # Set the title of the plot
# plt.title('Drought Classification Over Time')
# # Add legend
# handles = [plt.Line2D([0], [0], color=color, lw=4) for color in cmap.values()]
# labels = list(cmap.keys())
# plt.legend(handles, labels, title='Drought Classification', bbox_to_anchor=(1.05, 1), loc='upper left')
# # Adjust layout for better fit
# plt.tight_layout()
# # Show the plot
# plt.show()
import plotly.graph_objs as go
import pandas as pd
import numpy as np
# Map the classifications to colors
result_df['Color'] = result_df['Final Classification'].map(cmap)
# Create the plot
fig = go.Figure()
legend_order = [
'Extremely dry', 'Severely dry', 'Moderately dry', 'Mildly dry',
'Mildly wet', 'Moderately wet', 'Severely wet', 'Extremely wet',
'No Data'
]
# Add bars
for lbl in legend_order:
fig.add_trace(go.Bar(
x=result_df['time'].loc[result_df['Final Classification']==lbl],
y=[1] * len(result_df),
name=lbl,
marker=dict(color=result_df['Color'].loc[result_df['Final Classification']==lbl], line=dict(width=0)),
width=60 * 24 * 60 * 60 * 1000, # Width in milliseconds
orientation='v', # Vertical bars
# name='Drought Classification'
))
x_min = result_df['time'].min()
x_max = result_df['time'].max()
# Update x-axis and y-axis
fig.update_xaxes(
title_text='Time (Months)',
tickformat='%Y', # Format x-axis labels to show only year
tickangle=90, # Rotate x-axis labels
rangeslider_visible=False, # Hide the range slider
type='date',# Ensure x-axis is treated as dates
range=[x_min, x_max]
)
fig.update_yaxes(
visible=False # Hide y-axis
)
# Add legend
fig.update_layout(
title='Drought Classification Over Time',
legend_title='Drought Classification',
legend=dict(
x=1.05, # Positioning the legend to the right of the plot
y=1,
orientation='v',
traceorder='normal' # Ensure legend entries are in the order they appear in the plot
),
margin=dict(l=50, r=200, t=50, b=50),
paper_bgcolor='white',
plot_bgcolor='white',
font=dict(
color='#2a3f5f',
family='sans-serif'
),
)
# Show the plot
fig.show()
Setup function to detect continuous periods of a condition#
A drought event is defined as continuous periods of at least midly dry condition. Periods that are separated by a threshold number of months (default to 1) count as one event.
def detect_continuous_periods_with_dates(df, binary_col, date_col, min_sep=1):
"""
Detects continuous periods of 1s in a binary vector within a DataFrame and returns a new DataFrame
with the start date, end date, and duration of each period.
Parameters:
- df: Input DataFrame containing the binary vector and dates.
- binary_col: Column name for the binary vector (0s and 1s).
- date_col: Column name for the corresponding dates.
- min_sep: Minimum number of continuous 0s required to separate periods of 1s.
Returns:
- periods_df: A DataFrame with 'Start Date', 'End Date', and 'Duration' columns.
"""
# Ensure binary_col is binary (0s and 1s)
assert df[binary_col].isin([0, 1]).all(), "The binary column must contain only 0s and 1s."
# Detect transitions in the binary column
transitions = df[binary_col].diff().fillna(0)
# Find where the vector changes from 0 to 1 (start of 1s) and 1 to 0 (end of 1s)
start_ones = transitions == 1
end_ones = transitions == -1
# Get the indices of these transitions
start_indices = start_ones[start_ones].index
end_indices = end_ones[end_ones].index
# If the series starts with 1s, add a start at the beginning
if df[binary_col].iloc[0] == 1:
start_indices = pd.Index([df.index[0]]).append(start_indices)
# If the series ends with 1s, add an end at the end
if df[binary_col].iloc[-1] == 1:
end_indices = end_indices.append(pd.Index([df.index[-1]]))
# Ensure indices are aligned
assert len(start_indices) == len(end_indices), "Mismatched start and end periods."
# Filter out periods that are too close to each other based on min_sep
valid_periods = []
last_end = -min_sep - 1 # Initialize last_end to be far enough back
for start, end in zip(start_indices, end_indices):
if start - last_end >= min_sep:
valid_periods.append((start, end))
last_end = end
# Create a new DataFrame for the detected periods
periods = []
for start, end in valid_periods:
start_date = df.loc[start, date_col]
end_date = df.loc[end, date_col]
duration = (end_date.year - start_date.year) * 12 + end_date.month - start_date.month + 1 # Duration in months
periods.append({'Start Date': start_date, 'End Date': end_date, 'Duration': duration})
periods_df = pd.DataFrame(periods)
return periods_df
Convert the timeline to a binary vector.#
Every dry condition is marked as drought and everything else as no drought. A minimum separation of 2 months with no drought is regarded as no change.
min_sep = 1 # Minimum separation of 1 zeros to consider periods distinct
result_df['class'] = np.where((result_df['Final Classification']=='Extremely dry')|
(result_df['Final Classification']=='Severely dry')|
(result_df['Final Classification']=='Moderately dry')|
(result_df['Final Classification']=='Mildly dry'), 1, 0)
Find the continuous periods and calculate their duration#
periods_df = detect_continuous_periods_with_dates(result_df, binary_col='class', date_col='time', min_sep=min_sep)
periods_df
Start Date | End Date | Duration | |
---|---|---|---|
0 | 1941-07-01 06:00:00 | 1942-05-01 06:00:00 | 11 |
1 | 1942-11-01 06:00:00 | 1943-06-01 06:00:00 | 8 |
2 | 1944-11-01 06:00:00 | 1945-10-01 06:00:00 | 12 |
3 | 1946-02-01 06:00:00 | 1946-09-01 06:00:00 | 8 |
4 | 1965-10-01 06:00:00 | 1966-09-01 06:00:00 | 12 |
5 | 1966-10-01 06:00:00 | 1967-04-01 06:00:00 | 7 |
6 | 1988-06-01 06:00:00 | 1988-07-01 06:00:00 | 2 |
7 | 1991-03-01 06:00:00 | 1991-04-01 06:00:00 | 2 |
8 | 1994-03-01 06:00:00 | 1994-06-01 06:00:00 | 4 |
9 | 1997-09-01 06:00:00 | 1997-12-01 06:00:00 | 4 |
10 | 1998-04-01 06:00:00 | 1998-08-01 06:00:00 | 5 |
11 | 2000-10-01 06:00:00 | 2001-03-01 06:00:00 | 6 |
12 | 2001-05-01 06:00:00 | 2001-06-01 06:00:00 | 2 |
13 | 2001-07-01 06:00:00 | 2001-08-01 06:00:00 | 2 |
14 | 2002-07-01 06:00:00 | 2003-09-01 06:00:00 | 15 |
15 | 2003-10-01 06:00:00 | 2008-05-01 06:00:00 | 56 |
16 | 2008-06-01 06:00:00 | 2024-01-01 06:00:00 | 188 |
Plot all the event durations and find the 75 percentile to find drought events with an anomalous duration#
Events with a duration more than the 75 percentile can be characterized as very anomalous. For S.Sudan one such drought event was between July 2002 and September 2003.
# def plot_duration_bar_plot(data, percentile=75):
# percentile_9_duration = np.percentile(data.Duration, 90)
# percentile_1_duration = np.percentile(data.Duration, 10)
# median_duration = data.Duration.median()
# # Create the plot
# plt.figure(figsize=(10, 6))
# # Create bars for each event
# bars = plt.bar(data.index, data['Duration'], color='skyblue', edgecolor='black')
# # Add a dashed red line for the average duration
# plt.axhline(y=percentile_9_duration, color='red', linestyle='--', linewidth=2, label=f'{90} percentile of durations: {percentile_9_duration:.2f} months')
# plt.axhline(y=percentile_1_duration, color='green', linestyle='--', linewidth=2, label=f'{10} percentile of durations: {percentile_1_duration:.2f} months')
# plt.axhline(y=median_duration, color='blue', linestyle='--', linewidth=2, label=f'Median duration: {median_duration:.2f} months')
# # Labeling the x-axis ticks with the start and end dates
# xticks_labels = [f"{start.strftime('%Y-%m')} - {end.strftime('%Y-%m')}" for start, end in zip(data['Start Date'], data['End Date'])]
# plt.xticks(ticks=np.arange(len(data.index)), labels=xticks_labels)
# # Label axes
# plt.xlabel('Events')
# plt.ylabel('Duration (Months)')
# plt.title('Duration of drought events in history')
# # Add legend
# plt.legend()
# # Rotate x-axis labels for better readability
# plt.xticks(rotation=45, ha='right')
# # Adjust layout for better fit
# plt.tight_layout()
# # Show the plot
# plt.show()
# plot_duration_bar_plot(periods_df)
def plot_duration_bar_plot(data, percentile=75):
percentile_9_duration = np.percentile(data.Duration, 90)
percentile_1_duration = np.percentile(data.Duration, 10)
median_duration = data.Duration.median()
# Generate x-axis labels based on the dates
x_labels = [f"{start.strftime('%Y-%m')} - {end.strftime('%Y-%m')}" for start, end in zip(data['Start Date'], data['End Date'])]
# Create a numerical x-axis for the plot
x_numeric = list(range(len(x_labels)))
# Create bars for each event
bar = go.Bar(
x=x_numeric,
y=data['Duration'],
marker=dict(color='skyblue', line=dict(color='black', width=1)),
name='Event period',
)
# Define the x-axis range for the lines
line_x_values = [x_numeric[0] - 1, x_numeric[-1] + 1] # Extend beyond the first and last data point
# Create lines for percentiles and median
percentile_9_line = go.Scatter(
x=line_x_values,
y=[percentile_9_duration, percentile_9_duration],
mode='lines',
line=dict(color='red', dash='dash'),
name=f'90th percentile: {percentile_9_duration:.2f} months'
)
percentile_1_line = go.Scatter(
x=line_x_values,
y=[percentile_1_duration, percentile_1_duration],
mode='lines',
line=dict(color='green', dash='dash'),
name=f'10th percentile: {percentile_1_duration:.2f} months'
)
median_line = go.Scatter(
x=line_x_values,
y=[median_duration, median_duration],
mode='lines',
line=dict(color='blue', dash='dash'),
name=f'Median: {median_duration:.2f} months'
)
# Create the layout
layout = go.Layout(
title='Duration of drought events',
xaxis=dict(
title='Events',
tickangle=-45,
tickmode='array',
tickvals=x_numeric,
ticktext=x_labels,
range=[x_numeric[0] - 1, x_numeric[-1] + 1], # Extend x-axis range
),
yaxis=dict(title='Duration (Months)'),
barmode='group',
legend=dict(x=1, y=0.5, orientation='v'),
margin=dict(l=50, r=50, t=50, b=100),
paper_bgcolor='white',
plot_bgcolor='white',
font=dict(
color='#2a3f5f',
family='sans-serif'
),
)
# Create the figure and add the traces
fig = go.Figure(data=[bar, percentile_9_line, percentile_1_line, median_line], layout=layout)
# Show the plot
fig.show()
plot_duration_bar_plot(periods_df)
Calculate area percentage for each class for each month and aggregate for each event#
def calculate_area_percentage(monthly_data, periods):
columns_to_use = ['Extremely dry',
'Severely dry',
'Moderately dry',
'Mildly dry',
'Mildly wet',
'Moderately wet',
'Severely wet',
'Extremely wet']
new_columns = ['Extremely dry %',
'Severely dry %',
'Moderately dry %',
'Mildly dry %',
'Mildly wet %',
'Moderately wet %',
'Severely wet %',
'Extremely wet %']
rows = []
for i, row in periods.iterrows():
start_date = row['Start Date']
end_date = row['End Date']
df = monthly_data.loc[(monthly_data.time >= start_date) & (monthly_data.time <= end_date)]
total = df[columns_to_use].sum(axis=1)
# Calculate the percentage for each specified column
df_percentage = df[columns_to_use].div(total, axis=0) * 100
cols = {i[0]:i[1] for i in zip(columns_to_use, new_columns)}
df_percentage.rename(columns=cols,inplace=-True)
# Add the percentage columns back to the original dataframe, if needed
df.loc[:, new_columns] = df_percentage
rows.append(df[new_columns].mean(axis=0))
new_df = pd.concat(rows, axis=1).T.reset_index(drop=True)
new_df['Start Date'] = periods['Start Date']
new_df['End Date'] = periods['End Date']
return new_df
percentages = calculate_area_percentage(result_df, periods_df)
percentages
class | Extremely dry % | Severely dry % | Moderately dry % | Mildly dry % | Mildly wet % | Moderately wet % | Severely wet % | Extremely wet % | Start Date | End Date |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.522338 | 4.156479 | 11.658146 | 53.723050 | 29.228717 | 0.700156 | 0.011114 | 0.000000 | 1941-07-01 06:00:00 | 1942-05-01 06:00:00 |
1 | 2.444988 | 5.684597 | 8.175428 | 27.735330 | 48.960880 | 4.859413 | 1.589242 | 0.550122 | 1942-11-01 06:00:00 | 1943-06-01 06:00:00 |
2 | 1.629992 | 4.024042 | 7.620212 | 45.069275 | 38.457620 | 2.485738 | 0.550122 | 0.162999 | 1944-11-01 06:00:00 | 1945-10-01 06:00:00 |
3 | 0.702934 | 1.191932 | 4.003667 | 28.896699 | 61.048289 | 3.438264 | 0.473716 | 0.244499 | 1946-02-01 06:00:00 | 1946-09-01 06:00:00 |
4 | 5.063162 | 1.996740 | 3.249796 | 28.698044 | 44.947025 | 10.431948 | 3.942543 | 1.670742 | 1965-10-01 06:00:00 | 1966-09-01 06:00:00 |
5 | 3.021306 | 2.095704 | 4.016766 | 30.143206 | 40.866224 | 12.155082 | 5.780650 | 1.921062 | 1966-10-01 06:00:00 | 1967-04-01 06:00:00 |
6 | 0.061125 | 0.000000 | 0.000000 | 12.958435 | 75.611247 | 8.985330 | 2.322738 | 0.061125 | 1988-06-01 06:00:00 | 1988-07-01 06:00:00 |
7 | 0.000000 | 0.000000 | 0.122249 | 13.630807 | 57.762836 | 23.349633 | 5.012225 | 0.122249 | 1991-03-01 06:00:00 | 1991-04-01 06:00:00 |
8 | 1.344743 | 2.139364 | 4.828851 | 29.217604 | 61.308068 | 1.161369 | 0.000000 | 0.000000 | 1994-03-01 06:00:00 | 1994-06-01 06:00:00 |
9 | 0.244499 | 0.275061 | 2.414425 | 20.690709 | 54.125917 | 21.943765 | 0.305623 | 0.000000 | 1997-09-01 06:00:00 | 1997-12-01 06:00:00 |
10 | 0.097800 | 0.000000 | 0.855746 | 30.268949 | 63.960880 | 4.523227 | 0.293399 | 0.000000 | 1998-04-01 06:00:00 | 1998-08-01 06:00:00 |
11 | 0.285249 | 0.753871 | 1.915240 | 26.385493 | 70.252649 | 0.407498 | 0.000000 | 0.000000 | 2000-10-01 06:00:00 | 2001-03-01 06:00:00 |
12 | 0.061125 | 0.000000 | 0.061125 | 18.459658 | 81.356968 | 0.061125 | 0.000000 | 0.000000 | 2001-05-01 06:00:00 | 2001-06-01 06:00:00 |
13 | 0.122249 | 0.000000 | 0.061125 | 20.048900 | 78.606357 | 1.161369 | 0.000000 | 0.000000 | 2001-07-01 06:00:00 | 2001-08-01 06:00:00 |
14 | 0.089650 | 4.947025 | 13.504482 | 50.472698 | 30.839446 | 0.146699 | 0.000000 | 0.000000 | 2002-07-01 06:00:00 | 2003-09-01 06:00:00 |
15 | 0.074223 | 0.093870 | 2.984195 | 52.023664 | 42.630108 | 1.547765 | 0.628711 | 0.017464 | 2003-10-01 06:00:00 | 2008-05-01 06:00:00 |
16 | 9.442465 | 17.419237 | 20.608126 | 37.739947 | 12.841388 | 1.170473 | 0.498751 | 0.279613 | 2008-06-01 06:00:00 | 2024-01-01 06:00:00 |
percentages['Dry'] = percentages.loc[:, ['Extremely dry %', 'Severely dry %', 'Moderately dry %', 'Mildly dry %']].sum(axis=1)
percentages
class | Extremely dry % | Severely dry % | Moderately dry % | Mildly dry % | Mildly wet % | Moderately wet % | Severely wet % | Extremely wet % | Start Date | End Date | Dry |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.522338 | 4.156479 | 11.658146 | 53.723050 | 29.228717 | 0.700156 | 0.011114 | 0.000000 | 1941-07-01 06:00:00 | 1942-05-01 06:00:00 | 70.060013 |
1 | 2.444988 | 5.684597 | 8.175428 | 27.735330 | 48.960880 | 4.859413 | 1.589242 | 0.550122 | 1942-11-01 06:00:00 | 1943-06-01 06:00:00 | 44.040342 |
2 | 1.629992 | 4.024042 | 7.620212 | 45.069275 | 38.457620 | 2.485738 | 0.550122 | 0.162999 | 1944-11-01 06:00:00 | 1945-10-01 06:00:00 | 58.343521 |
3 | 0.702934 | 1.191932 | 4.003667 | 28.896699 | 61.048289 | 3.438264 | 0.473716 | 0.244499 | 1946-02-01 06:00:00 | 1946-09-01 06:00:00 | 34.795232 |
4 | 5.063162 | 1.996740 | 3.249796 | 28.698044 | 44.947025 | 10.431948 | 3.942543 | 1.670742 | 1965-10-01 06:00:00 | 1966-09-01 06:00:00 | 39.007742 |
5 | 3.021306 | 2.095704 | 4.016766 | 30.143206 | 40.866224 | 12.155082 | 5.780650 | 1.921062 | 1966-10-01 06:00:00 | 1967-04-01 06:00:00 | 39.276982 |
6 | 0.061125 | 0.000000 | 0.000000 | 12.958435 | 75.611247 | 8.985330 | 2.322738 | 0.061125 | 1988-06-01 06:00:00 | 1988-07-01 06:00:00 | 13.019560 |
7 | 0.000000 | 0.000000 | 0.122249 | 13.630807 | 57.762836 | 23.349633 | 5.012225 | 0.122249 | 1991-03-01 06:00:00 | 1991-04-01 06:00:00 | 13.753056 |
8 | 1.344743 | 2.139364 | 4.828851 | 29.217604 | 61.308068 | 1.161369 | 0.000000 | 0.000000 | 1994-03-01 06:00:00 | 1994-06-01 06:00:00 | 37.530562 |
9 | 0.244499 | 0.275061 | 2.414425 | 20.690709 | 54.125917 | 21.943765 | 0.305623 | 0.000000 | 1997-09-01 06:00:00 | 1997-12-01 06:00:00 | 23.624694 |
10 | 0.097800 | 0.000000 | 0.855746 | 30.268949 | 63.960880 | 4.523227 | 0.293399 | 0.000000 | 1998-04-01 06:00:00 | 1998-08-01 06:00:00 | 31.222494 |
11 | 0.285249 | 0.753871 | 1.915240 | 26.385493 | 70.252649 | 0.407498 | 0.000000 | 0.000000 | 2000-10-01 06:00:00 | 2001-03-01 06:00:00 | 29.339853 |
12 | 0.061125 | 0.000000 | 0.061125 | 18.459658 | 81.356968 | 0.061125 | 0.000000 | 0.000000 | 2001-05-01 06:00:00 | 2001-06-01 06:00:00 | 18.581907 |
13 | 0.122249 | 0.000000 | 0.061125 | 20.048900 | 78.606357 | 1.161369 | 0.000000 | 0.000000 | 2001-07-01 06:00:00 | 2001-08-01 06:00:00 | 20.232274 |
14 | 0.089650 | 4.947025 | 13.504482 | 50.472698 | 30.839446 | 0.146699 | 0.000000 | 0.000000 | 2002-07-01 06:00:00 | 2003-09-01 06:00:00 | 69.013855 |
15 | 0.074223 | 0.093870 | 2.984195 | 52.023664 | 42.630108 | 1.547765 | 0.628711 | 0.017464 | 2003-10-01 06:00:00 | 2008-05-01 06:00:00 | 55.175952 |
16 | 9.442465 | 17.419237 | 20.608126 | 37.739947 | 12.841388 | 1.170473 | 0.498751 | 0.279613 | 2008-06-01 06:00:00 | 2024-01-01 06:00:00 | 85.209775 |
def plot_area_bar_plot(data, columns_to_sum=['Moderately dry %',
'Mildly dry %',
'Mildly wet %',
'Moderately wet %',
'Severely wet %',
'Extremely wet %']):
columns = [i for i in data.columns if '%' in i and i not in columns_to_sum]
fig = go.Figure()
x_axis_labels = [f"{start.strftime('%Y-%m')} - {end.strftime('%Y-%m')}" for start, end in zip(data['Start Date'], data['End Date'])]
# Adding bars for each category
if columns_to_sum:
fig.add_trace(go.Bar(
x=x_axis_labels,
y=data[columns_to_sum].sum(axis=1),
name='Normal',
marker=dict(color=cmap['Severely wet'], line=dict(width=0))
))
for category in columns[::-1]:
fig.add_trace(go.Bar(
x=x_axis_labels,
y=data[category],
name=category[:-2],
marker=dict(color=cmap[category[:-2]])
))
# Updating the layout for stacked bar
fig.update_layout(
barmode='stack', # This ensures the bars are stacked
title='Area of each type of drought',
xaxis=dict(title='Events',
tickangle=-45, # Rotate the x-axis labels by -45 degrees
tickmode='array',
tickvals=x_axis_labels,
ticktext=x_axis_labels,),
yaxis=dict(title='Percentage'),
legend=dict(orientation='v',x=1, y=0.5),
margin=dict(l=50, r=50, t=50, b=100),
paper_bgcolor='white', # Transparent background for the entire paper
plot_bgcolor='white',
font=dict(
color='#2a3f5f',
family='sans-serif'
),
bargap=0
)
# Show the plot
fig.show()
plot_area_bar_plot(percentages, columns_to_sum=[])
plot_area_bar_plot(percentages)
Plot durations for each grid point on map for drought event 1#
event_1 = periods_df.iloc[4]
event_data = spei.loc[event_1['Start Date'].isoformat():event_1['End Date'].isoformat()]
# condition = event_data < -1.5
nan_mask = np.isnan(event_data)
condition = xr.where(nan_mask, np.nan, event_data < -1.5)
occurrences_xr = condition.sum(dim='time')
occurrences_xr = occurrences_xr.rio.write_crs("EPSG:4326", inplace=True)
occurrences_xr = occurrences_xr.rio.set_spatial_dims(x_dim='lon', y_dim='lat')
occurrences_xr = occurrences_xr.rio.clip(country_shape.geometry, world.crs, drop=True)
occurrences_xr.hvplot(
widget_type="scrubber",
widget_location="bottom",
projection=ccrs.PlateCarree(),
coastline='10m',
cmap='BrBG_r',
features=['borders']
)
# Mask area outside of aoi shape
nan_mask = np.isnan(event_data)
result = xr.where(nan_mask, np.nan, occurrences_xr)
result = result.rio.write_crs("EPSG:4326", inplace=True)
result = result.rio.set_spatial_dims(x_dim='lon', y_dim='lat')
result = result.rio.clip(country_shape.geometry, world.crs, drop=True)
df = occurrences_xr.to_dataframe().reset_index()
df = df.dropna(subset=['lat', 'lon', 'SPEI12'])
# Extract the latitude, longitude, and occurrence data
lat = df['lat'].values
lon = df['lon'].values
occurrences = df['SPEI12'].values
# Create the base map with country borders
fig = go.Figure(go.Scattergeo(
locationmode = 'country names',
lon = lon,
lat = lat,
text = occurrences,
marker = dict(
size = 7,
color = occurrences, # Use occurrences for color
colorscale = 'BrBG_r',
showscale = True,
colorbar=dict(title="Occurrences"),
line=dict(width=1, color='black')
)
))
# Update the layout for better visibility
fig.update_layout(
# width=1200, # Increase plot width
# height=800, # Increase plot height
geo=dict(
scope='world',
projection_type='natural earth',
showcoastlines=True,
showcountries=True,
countrycolor="Black",
coastlinecolor="Black",
# Define the region of interest by setting lat/lon bounds
lonaxis=dict(range=[lon.min()-0.5, lon.max()+0.5]), # Longitude bounds
lataxis=dict(range=[lat.min()-0.5, lat.max()+0.5]), # Latitude bounds
# Optionally, you can specify the center of the map
center=dict(lon=lon.mean(), lat=lat.mean()), # Center of the map
),
title="Monthly Occurrences",
)
# Show the plot
fig.show()
Group durations of each grid point in groups:#
Up to 1 month
Up to 3 months
Up to 6 months
More than 6 months
from plotly.subplots import make_subplots
conditions = [
(lambda x: x < -0.5, 'Mildly dry'),
(lambda x: x < -1, 'Moderately dry'),
(lambda x: x < -1.5, 'Severely dry'),
(lambda x: x < -2, 'Extremely dry')
]
def categorize_occurrences(x):
if np.isnan(x):
return 'No data'
elif x <= 1:
return 'Up to 1 month'
elif x <= 3:
return 'Up to 3 months'
elif x <= 6:
return 'Up to 6 months'
else:
return 'More than 6 months'
def add_traces_to_subplot(fig, df):
df = df.dropna(subset=['lat', 'lon', 'category'])
lat = df['lat'].values
lon = df['lon'].values
category = df['category'].values
color_map = {
# 'No data': 'white',
'Up to 1 month': 'blue',
'Up to 3 months': 'green',
'Up to 6 months': 'orange',
'More than 6 months': 'red'
}
# Plot each category separately to apply different colors
for cat in color_map:
mask = df['category'] == cat
fig.add_trace(go.Scattergeo(
lon=lon[mask],
lat=lat[mask],
text=category[mask],
mode='markers',
marker=dict(
size=10,
color=color_map[cat],
opacity=1,
line=dict(width=1, color='black')
),
name=cat, # Label for the legend
showlegend=True # Control legend visibility
))
for condition, description in conditions:
# Apply spei condition
data = condition(event_data).sum(dim='time')
# Mask area outside of aoi shape
nan_mask = np.isnan(data)
result = xr.where(nan_mask, np.nan, data)
result = result.rio.write_crs("EPSG:4326", inplace=True)
result = result.rio.set_spatial_dims(x_dim='lon', y_dim='lat')
result = result.rio.clip(country_shape.geometry, world.crs, drop=True)
fig = go.Figure()
# Convert xarray to Dataframe and categorize grid point
df = result.to_dataframe().reset_index()
df['category'] = df['SPEI12'].apply(categorize_occurrences)
# Plot data
add_traces_to_subplot(fig, df)
# Update the layout: Increase the size and set the geographic scope
fig.update_layout(
geo=dict(
scope='world',
projection_type='natural earth',
showcoastlines=True,
showcountries=True,
countrycolor="Black",
coastlinecolor="Black",
# Define the region of interest by setting lat/lon bounds so the plot is zoomed in the AOI
lonaxis=dict(range=[lon.min() - 0.5, lon.max() + 0.5]), # Longitude bounds
lataxis=dict(range=[lat.min() - 0.5, lat.max() + 0.5]), # Latitude bounds
# Specify the center of the map
center=dict(lon=lon.mean(), lat=lat.mean()), # Center of the map
),
title=f"Monthly Occurrences of {description}",
margin=dict(l=1, r=1, t=80, b=5),
legend=dict(
x=1.05,
y=0.5,
traceorder='normal',
orientation='v'
)
)
# Show the plot
fig.show()