In [1]:

Copied!





import csv
import numpy as np
import pandas as pd

def td_dat_file_generation(time_series, cluster_matrix, nbr_td, out_path):
    """Generate the .dat file for the typical days"""
    eud_params = {'Electricity (%_elec)': 'param electricity_time_series :',
                    'Space Heating (%_sh)': 'param heating_time_series :',
                    'Passanger mobility (%_pass)': 'param mob_pass_time_series :',
                    'Freight mobility (%_freight)': 'param mob_freight_time_series :'}
    # for resources timeseries that have only 1 tech linked to it
    res_params = {'PV': 'PV', 'Wind_onshore': 'WIND_ONSHORE', 'Wind_offshore': 'WIND_OFFSHORE',
                    'Hydro_river': 'HYDRO_RIVER',
                    }
    # for resources timeseries that have several techs linked to it
    res_mult_params = {'Solar': ['DHN_SOLAR', 'DEC_SOLAR']}

    # Redefine the output file from the out_path given #
    out_path ='tutorial_output/'+'ESTD_' + str(nbr_td) + 'TD.dat'

    # READING OUTPUT OF STEP1 #
    td_data = generate_t_h_td(cluster_matrix.reset_index(drop=True).rename(columns={'TypicalDay': 'TD_of_days'}), nbr_td)
    # config['td_data'] = td_data

    # COMPUTING NUMBER OF DAYS REPRESENTED BY EACH TD #
    sorted_td = td_data['td_count'].copy()

    # BUILDING T_H_TD MATRICE #
    # generate T_H_TD
    t_h_td = td_data['t_h_td'].copy()
    # giving the right syntax for AMPL
    t_h_td['par_g'] = '('
    t_h_td['par_d'] = ')'
    t_h_td['comma1'] = ','
    t_h_td['comma2'] = ','
    # giving the right order to the columns
    t_h_td = t_h_td[['par_g', 'H_of_Y', 'comma1', 'H_of_D', 'comma2', 'TD_number', 'par_d']]

    # COMPUTING THE NORM OVER THE YEAR ##
    norm = time_series.sum(axis=0)
    norm.index.rename('Category', inplace=True)
    norm.name = 'Norm'

    # BUILDING TD TIMESERIES #
    # creating df with 2 columns : day of the year | hour in the day
    d_of_h = np.repeat(np.arange(1, 366, 1), 24, axis=0)  # 24 times each day of the year
    h_of_d = np.resize(np.arange(1, 25), 24 * 365)  # 365 times hours from 1 to 24
    day_and_hour = pd.DataFrame(np.vstack((d_of_h, h_of_d)).T, index=np.arange(1, 8761, 1),
                                columns=['D_of_H', 'H_of_D'])
    day_and_hour = day_and_hour.astype('int64')
    time_series = time_series.merge(day_and_hour, left_index=True, right_index=True)

    # selecting time series of TD only
    td_ts = time_series[time_series['D_of_H'].isin(sorted_td['TD_of_days'])]

    # COMPUTING THE NORM_TD OVER THE YEAR FOR CORRECTION #
    # computing the sum of ts over each TD
    agg_td_ts = td_ts.groupby('D_of_H').sum()
    agg_td_ts.reset_index(inplace=True)
    agg_td_ts.drop(columns=['D_of_H', 'H_of_D'], inplace=True)
    # multiplicating each TD by the number of day it represents
    for c in agg_td_ts.columns:
        agg_td_ts[c] = agg_td_ts[c] * sorted_td['#days']
    # sum of new ts over the whole year
    norm_td = agg_td_ts.sum()

    # BUILDING THE DF WITH THE TS OF EACH TD FOR EACH CATEGORY #
    # pivoting TD_ts to obtain a (24,Nbr_TD*Nbr_ts*N_c)
    all_td_ts = td_ts.pivot(index='H_of_D', columns='D_of_H')

    # COMPUTE peak_sh_factor #
    max_sh_td = td_ts.loc[:, 'Space Heating (%_sh)'].max()
    max_sh_all = time_series.loc[:, 'Space Heating (%_sh)'].max()
    peak_sh_factor = max_sh_all / max_sh_td

    # PRINTING #
    # printing description of file
    # header_file = (Path(__file__).parent / 'headers' / 'header_12td.txt')
    # print_header(header_file=header_file, dat_file=out_path)

    # printing sets and parameters
    with open(out_path, mode='a', newline='') as td_file:
        td_writer = csv.writer(td_file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)

        # # print nbr_tds param
        # td_writer.writerow(['param nbr_tds := ' + str(nbr_td)])
        # td_writer.writerow([';		'])
        # td_writer.writerow(['		'])
        # # peak_sh_factor
        # td_writer.writerow(['param peak_sh_factor	:=	' + str(peak_sh_factor)])
        # td_writer.writerow([';		'])
        # td_writer.writerow(['		'])

        # printing T_H_TD param
        td_writer.writerow(['#SETS [Figure 3]		'])
        td_writer.writerow(['set T_H_TD := 		'])

    t_h_td.to_csv(out_path, sep='\t', header=False, index=False, mode='a', quoting=csv.QUOTE_NONE)

    # printing interlude
    with open(out_path, mode='a', newline='') as td_file:
        td_writer = csv.writer(td_file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)

        td_writer.writerow([';'])
        td_writer.writerow([''])
        td_writer.writerow(['# -----------------------------'])
        td_writer.writerow(['# PARAMETERS DEPENDING ON NUMBER OF TYPICAL DAYS : '])
        td_writer.writerow(['# -----------------------------'])
        td_writer.writerow([''])

    # printing EUD timeseries param
    for k in eud_params.keys():
        ts = all_td_ts[k]
        ts.columns = np.arange(1, nbr_td + 1)
        ts = ts * norm[k] / norm_td[k]
        ts.fillna(0, inplace=True)

        ts = ampl_syntax(ts, '')
        print_df(eud_params[k], ts, out_path)
        newline(out_path)

    # printing c_p_t param #
    with open(out_path, mode='a', newline='') as td_file:
        td_writer = csv.writer(td_file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
        td_writer.writerow(['param c_p_t:='])
        # printing c_p_t part where 1 ts => 1 tech
    for k in res_params.keys():
        ts = all_td_ts[k]
        ts.columns = np.arange(1, nbr_td + 1)
        ts = ts * norm[k] / norm_td[k]
        ts.fillna(0, inplace=True)

        ts = ampl_syntax(ts, '')
        s = '["' + res_params[k] + '",*,*]:'
        ts.to_csv(out_path, sep='\t', mode='a', header=True, index=True, index_label=s, quoting=csv.QUOTE_NONE)
        newline(out_path)

    # printing c_p_t part where 1 ts => more then 1 tech
    for k in res_mult_params.keys():
        for j in res_mult_params[k]:
            ts = all_td_ts[k]
            ts.columns = np.arange(1, nbr_td + 1)
            ts = ts * norm[k] / norm_td[k]
            ts.fillna(0, inplace=True)
            ts = ampl_syntax(ts, '')
            s = '["' + j + '",*,*]:'
            ts.to_csv(out_path, sep='\t', mode='a', header=True, index=True, index_label=s, quoting=csv.QUOTE_NONE)


def generate_t_h_td(td_of_days, nbr_td):
    """Generate t_h_td and td_count dataframes and assign it to each region
    t_h_td is a pd.DataFrame containing 4 columns:
    hour of the year (H_of_Y), hour of the day (H_of_D), typical day representing this day (TD_of_days)
    and the number assigned to this typical day (TD_number)

    td_count is a pd.DataFrame containing 2 columns:
    List of typical days (TD_of_days) and number of days they represent (#days)
    """

    # Reading td_of_days
    # td_of_days = pd.read_csv(config['step1_path'] / 'td_of_days.out', names=['TD_of_days'])

    
    td_of_days['day'] = np.arange(1, 366, 1)  # putting the days of the year beside

    # COMPUTING NUMBER OF DAYS REPRESENTED BY EACH TD AND ASSIGNING A TD NUMBER TO EACH REPRESENTATIVE DAY
    td_count = td_of_days.groupby('TD_of_days').count()
    td_count = td_count.reset_index().rename(columns={'index': 'TD_of_days', 'day': '#days'})
    td_count['TD_number'] = np.arange(1,nbr_td + 1)

    # BUILDING T_H_TD MATRICE
    t_h_td = pd.DataFrame(np.repeat(td_of_days['TD_of_days'].values, 24, axis=0),
                        columns=['TD_of_days'])  # column TD_of_days is each TD repeated 24 times
    map_td = dict(zip(td_count['TD_of_days'],
                    np.arange(1, nbr_td + 1)))  # mapping dictionnary from TD_of_Days to TD number
    t_h_td['TD_number'] = t_h_td['TD_of_days'].map(map_td)
    t_h_td['H_of_D'] = np.resize(np.arange(1, 25), t_h_td.shape[0])  # 365 times hours from 1 to 24
    t_h_td['H_of_Y'] = np.arange(1, 8761)
    return {'td_of_days': td_of_days, 'td_count': td_count, 't_h_td': t_h_td}

def ampl_syntax(df, comment):
    # adds ampl syntax to df
    df2 = df.copy()
    df2.rename(columns={df2.columns[df2.shape[1] - 1]: str(df2.columns[df2.shape[1] - 1]) + ' ' + ':= ' + comment},
            inplace=True)
    return df2

def print_df(name, df, out_path):
    df.to_csv(out_path, sep='\t', mode='a', header=True, index=True, index_label=name, quoting=csv.QUOTE_NONE)

    with open(out_path, mode='a', newline='') as file:
        writer = csv.writer(file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
        writer.writerow([';'])

def newline(out_path):
    with open(out_path, mode='a', newline='') as file:
        writer = csv.writer(file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
        writer.writerow([''])
import csv
import numpy as np
import pandas as pd

def td_dat_file_generation(time_series, cluster_matrix, nbr_td, out_path):
    """Generate the .dat file for the typical days"""
    eud_params = {'Electricity (%_elec)': 'param electricity_time_series :',
                    'Space Heating (%_sh)': 'param heating_time_series :',
                    'Passanger mobility (%_pass)': 'param mob_pass_time_series :',
                    'Freight mobility (%_freight)': 'param mob_freight_time_series :'}
    # for resources timeseries that have only 1 tech linked to it
    res_params = {'PV': 'PV', 'Wind_onshore': 'WIND_ONSHORE', 'Wind_offshore': 'WIND_OFFSHORE',
                    'Hydro_river': 'HYDRO_RIVER',
                    }
    # for resources timeseries that have several techs linked to it
    res_mult_params = {'Solar': ['DHN_SOLAR', 'DEC_SOLAR']}

    # Redefine the output file from the out_path given #
    out_path ='tutorial_output/'+'ESTD_' + str(nbr_td) + 'TD.dat'

    # READING OUTPUT OF STEP1 #
    td_data = generate_t_h_td(cluster_matrix.reset_index(drop=True).rename(columns={'TypicalDay': 'TD_of_days'}), nbr_td)
    # config['td_data'] = td_data

    # COMPUTING NUMBER OF DAYS REPRESENTED BY EACH TD #
    sorted_td = td_data['td_count'].copy()

    # BUILDING T_H_TD MATRICE #
    # generate T_H_TD
    t_h_td = td_data['t_h_td'].copy()
    # giving the right syntax for AMPL
    t_h_td['par_g'] = '('
    t_h_td['par_d'] = ')'
    t_h_td['comma1'] = ','
    t_h_td['comma2'] = ','
    # giving the right order to the columns
    t_h_td = t_h_td[['par_g', 'H_of_Y', 'comma1', 'H_of_D', 'comma2', 'TD_number', 'par_d']]

    # COMPUTING THE NORM OVER THE YEAR ##
    norm = time_series.sum(axis=0)
    norm.index.rename('Category', inplace=True)
    norm.name = 'Norm'

    # BUILDING TD TIMESERIES #
    # creating df with 2 columns : day of the year | hour in the day
    d_of_h = np.repeat(np.arange(1, 366, 1), 24, axis=0)  # 24 times each day of the year
    h_of_d = np.resize(np.arange(1, 25), 24 * 365)  # 365 times hours from 1 to 24
    day_and_hour = pd.DataFrame(np.vstack((d_of_h, h_of_d)).T, index=np.arange(1, 8761, 1),
                                columns=['D_of_H', 'H_of_D'])
    day_and_hour = day_and_hour.astype('int64')
    time_series = time_series.merge(day_and_hour, left_index=True, right_index=True)

    # selecting time series of TD only
    td_ts = time_series[time_series['D_of_H'].isin(sorted_td['TD_of_days'])]

    # COMPUTING THE NORM_TD OVER THE YEAR FOR CORRECTION #
    # computing the sum of ts over each TD
    agg_td_ts = td_ts.groupby('D_of_H').sum()
    agg_td_ts.reset_index(inplace=True)
    agg_td_ts.drop(columns=['D_of_H', 'H_of_D'], inplace=True)
    # multiplicating each TD by the number of day it represents
    for c in agg_td_ts.columns:
        agg_td_ts[c] = agg_td_ts[c] * sorted_td['#days']
    # sum of new ts over the whole year
    norm_td = agg_td_ts.sum()

    # BUILDING THE DF WITH THE TS OF EACH TD FOR EACH CATEGORY #
    # pivoting TD_ts to obtain a (24,Nbr_TD*Nbr_ts*N_c)
    all_td_ts = td_ts.pivot(index='H_of_D', columns='D_of_H')

    # COMPUTE peak_sh_factor #
    max_sh_td = td_ts.loc[:, 'Space Heating (%_sh)'].max()
    max_sh_all = time_series.loc[:, 'Space Heating (%_sh)'].max()
    peak_sh_factor = max_sh_all / max_sh_td

    # PRINTING #
    # printing description of file
    # header_file = (Path(__file__).parent / 'headers' / 'header_12td.txt')
    # print_header(header_file=header_file, dat_file=out_path)

    # printing sets and parameters
    with open(out_path, mode='a', newline='') as td_file:
        td_writer = csv.writer(td_file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)

        # # print nbr_tds param
        # td_writer.writerow(['param nbr_tds := ' + str(nbr_td)])
        # td_writer.writerow([';		'])
        # td_writer.writerow(['		'])
        # # peak_sh_factor
        # td_writer.writerow(['param peak_sh_factor	:=	' + str(peak_sh_factor)])
        # td_writer.writerow([';		'])
        # td_writer.writerow(['		'])

        # printing T_H_TD param
        td_writer.writerow(['#SETS [Figure 3]		'])
        td_writer.writerow(['set T_H_TD := 		'])

    t_h_td.to_csv(out_path, sep='\t', header=False, index=False, mode='a', quoting=csv.QUOTE_NONE)

    # printing interlude
    with open(out_path, mode='a', newline='') as td_file:
        td_writer = csv.writer(td_file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)

        td_writer.writerow([';'])
        td_writer.writerow([''])
        td_writer.writerow(['# -----------------------------'])
        td_writer.writerow(['# PARAMETERS DEPENDING ON NUMBER OF TYPICAL DAYS : '])
        td_writer.writerow(['# -----------------------------'])
        td_writer.writerow([''])

    # printing EUD timeseries param
    for k in eud_params.keys():
        ts = all_td_ts[k]
        ts.columns = np.arange(1, nbr_td + 1)
        ts = ts * norm[k] / norm_td[k]
        ts.fillna(0, inplace=True)

        ts = ampl_syntax(ts, '')
        print_df(eud_params[k], ts, out_path)
        newline(out_path)

    # printing c_p_t param #
    with open(out_path, mode='a', newline='') as td_file:
        td_writer = csv.writer(td_file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
        td_writer.writerow(['param c_p_t:='])
        # printing c_p_t part where 1 ts => 1 tech
    for k in res_params.keys():
        ts = all_td_ts[k]
        ts.columns = np.arange(1, nbr_td + 1)
        ts = ts * norm[k] / norm_td[k]
        ts.fillna(0, inplace=True)

        ts = ampl_syntax(ts, '')
        s = '["' + res_params[k] + '",*,*]:'
        ts.to_csv(out_path, sep='\t', mode='a', header=True, index=True, index_label=s, quoting=csv.QUOTE_NONE)
        newline(out_path)

    # printing c_p_t part where 1 ts => more then 1 tech
    for k in res_mult_params.keys():
        for j in res_mult_params[k]:
            ts = all_td_ts[k]
            ts.columns = np.arange(1, nbr_td + 1)
            ts = ts * norm[k] / norm_td[k]
            ts.fillna(0, inplace=True)
            ts = ampl_syntax(ts, '')
            s = '["' + j + '",*,*]:'
            ts.to_csv(out_path, sep='\t', mode='a', header=True, index=True, index_label=s, quoting=csv.QUOTE_NONE)


def generate_t_h_td(td_of_days, nbr_td):
    """Generate t_h_td and td_count dataframes and assign it to each region
    t_h_td is a pd.DataFrame containing 4 columns:
    hour of the year (H_of_Y), hour of the day (H_of_D), typical day representing this day (TD_of_days)
    and the number assigned to this typical day (TD_number)

    td_count is a pd.DataFrame containing 2 columns:
    List of typical days (TD_of_days) and number of days they represent (#days)
    """

    # Reading td_of_days
    # td_of_days = pd.read_csv(config['step1_path'] / 'td_of_days.out', names=['TD_of_days'])

    
    td_of_days['day'] = np.arange(1, 366, 1)  # putting the days of the year beside

    # COMPUTING NUMBER OF DAYS REPRESENTED BY EACH TD AND ASSIGNING A TD NUMBER TO EACH REPRESENTATIVE DAY
    td_count = td_of_days.groupby('TD_of_days').count()
    td_count = td_count.reset_index().rename(columns={'index': 'TD_of_days', 'day': '#days'})
    td_count['TD_number'] = np.arange(1,nbr_td + 1)

    # BUILDING T_H_TD MATRICE
    t_h_td = pd.DataFrame(np.repeat(td_of_days['TD_of_days'].values, 24, axis=0),
                        columns=['TD_of_days'])  # column TD_of_days is each TD repeated 24 times
    map_td = dict(zip(td_count['TD_of_days'],
                    np.arange(1, nbr_td + 1)))  # mapping dictionnary from TD_of_Days to TD number
    t_h_td['TD_number'] = t_h_td['TD_of_days'].map(map_td)
    t_h_td['H_of_D'] = np.resize(np.arange(1, 25), t_h_td.shape[0])  # 365 times hours from 1 to 24
    t_h_td['H_of_Y'] = np.arange(1, 8761)
    return {'td_of_days': td_of_days, 'td_count': td_count, 't_h_td': t_h_td}

def ampl_syntax(df, comment):
    # adds ampl syntax to df
    df2 = df.copy()
    df2.rename(columns={df2.columns[df2.shape[1] - 1]: str(df2.columns[df2.shape[1] - 1]) + ' ' + ':= ' + comment},
            inplace=True)
    return df2

def print_df(name, df, out_path):
    df.to_csv(out_path, sep='\t', mode='a', header=True, index=True, index_label=name, quoting=csv.QUOTE_NONE)

    with open(out_path, mode='a', newline='') as file:
        writer = csv.writer(file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
        writer.writerow([';'])

def newline(out_path):
    with open(out_path, mode='a', newline='') as file:
        writer = csv.writer(file, delimiter='\t', quotechar=' ', quoting=csv.QUOTE_MINIMAL)
        writer.writerow([''])

Typical Days (python code)¶

Typical days are a way to represent the variability of energy demand and generation over a year by selecting a few representative days. This approach allows for a more computionally manageable analysis while still capturing the stochasticity of the energy system (renewable energy, demand, etc.).

Reference:

1. Parameters & Paths¶

In [2]:

Copied!





import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import sys
from amplpy import AMPL

# Parameters & Paths
nbr_td = 12  # Number of representative days
path_td_data = 'tutorial_input/td-generation/'
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import sys
from amplpy import AMPL

# Parameters & Paths
nbr_td = 12  # Number of representative days
path_td_data = 'tutorial_input/td-generation/'

2. Load & Normalize Time Series¶

Read hourly electricity and heating data
Normalize each series over the year
Pivot into daily (365×24) matrix

Out[3]:

In [4]:

Copied!





time_series = pd.read_csv(path_td_data+'Time_series.csv', sep=';', header=0, index_col=0)
ts = time_series.copy()
# renaming columns to match the expected format
ts.rename(columns={'Electricity (%_elec)': 'LIGHTING', 'Space Heating (%_sh)': 'HEAT_LOW_T_SH'}, inplace=True)
ts_names = ts.columns
# normalize the timeseries
ts = ts/ts.sum()
# adding columns for pivoting
ts['Days'] = np.repeat(np.arange(1, 366), 24, axis=0)
ts['H_of_D'] = np.resize(np.arange(1, 25), ts.shape[0])
# pivoting normalized time series (norm_ts) to get daily normalized time series (n_daily_ts)
n_daily_ts = ts.pivot(index='Days', columns='H_of_D', values=ts_names)

n_daily_ts
time_series = pd.read_csv(path_td_data+'Time_series.csv', sep=';', header=0, index_col=0)
ts = time_series.copy()
# renaming columns to match the expected format
ts.rename(columns={'Electricity (%_elec)': 'LIGHTING', 'Space Heating (%_sh)': 'HEAT_LOW_T_SH'}, inplace=True)
ts_names = ts.columns
# normalize the timeseries
ts = ts/ts.sum()
# adding columns for pivoting
ts['Days'] = np.repeat(np.arange(1, 366), 24, axis=0)
ts['H_of_D'] = np.resize(np.arange(1, 25), ts.shape[0])
# pivoting normalized time series (norm_ts) to get daily normalized time series (n_daily_ts)
n_daily_ts = ts.pivot(index='Days', columns='H_of_D', values=ts_names)

n_daily_ts

Out[4]:

	LIGHTING										...	Solar
H_of_D	1	2	3	4	5	6	7	8	9	10	...	15	16	17	18	19	20	21	22	23	24
Days
1	0.000091	0.000071	0.000053	0.000038	0.000031	0.000030	0.000032	0.000023	0.000024	0.000029	...	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.000066	0.000048	0.000033	0.000024	0.000026	0.000041	0.000071	0.000093	0.000118	0.000135	...	0.000859	0.000380	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3	0.000106	0.000079	0.000068	0.000058	0.000059	0.000073	0.000100	0.000122	0.000145	0.000155	...	0.000023	0.000008	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
4	0.000109	0.000086	0.000068	0.000061	0.000058	0.000058	0.000068	0.000076	0.000098	0.000114	...	0.000052	0.000184	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
5	0.000095	0.000075	0.000058	0.000051	0.000049	0.000049	0.000057	0.000058	0.000073	0.000086	...	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
361	0.000105	0.000087	0.000072	0.000061	0.000058	0.000058	0.000065	0.000071	0.000089	0.000107	...	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
362	0.000111	0.000093	0.000080	0.000067	0.000061	0.000060	0.000066	0.000067	0.000079	0.000097	...	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
363	0.000115	0.000098	0.000083	0.000073	0.000069	0.000076	0.000098	0.000112	0.000136	0.000148	...	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
364	0.000113	0.000094	0.000076	0.000066	0.000065	0.000075	0.000097	0.000111	0.000135	0.000145	...	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
365	0.000110	0.000089	0.000071	0.000062	0.000059	0.000064	0.000082	0.000095	0.000119	0.000131	...	0.000597	0.000573	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

365 rows × 216 columns

3. Compute Series Weights¶

Upscale normalized timeseries of demand and production
Heating converted via a 0.204 quality factor
Normalize so demand & supply each contribute half the total

In [5]:

Copied!





tot_ts = time_series.copy().sum(axis=0)
demand = pd.read_csv(path_td_data+'Demand.csv', sep=';', index_col=2, header=0)
technologies = pd.read_csv(path_td_data+'Technologies.csv',  sep=';', index_col=3, header=0, skiprows=[1])
technologies.index = technologies.index.str.strip()

###### THE FOLLOWING 4 LINES MIGHT NEED TO BE ADAPTED ######
demand_ts = ['LIGHTING', 'HEAT_LOW_T_SH']   # Considered eud for the clustering
prod_ts = ['PV', 'Wind_onshore', 'Wind_offshore', 'Hydro_river'] # Considered technologies for the clustering
prod_ts2 = ['PV', 'WIND_ONSHORE', 'WIND_OFFSHORE', 'HYDRO_RIVER']
tot_ts.rename({'Electricity (%_elec)': 'LIGHTING', 'Space Heating (%_sh)': 'HEAT_LOW_T_SH'}, inplace=True)

# multiply demand time series sum by the year consumption
tot_ts[demand_ts] = tot_ts[demand_ts] * demand.loc[demand_ts, :].sum(axis=1, numeric_only=True).values

# Weight the heating time series by a conversion coefficient to account for the difference in energy quality compared to the electricity time series
tot_ts.loc['HEAT_LOW_T_SH'] *= 0.204

# multiply the sum of the production time series by the maximum potential (f_max in GW) of the corresponding technologies
tot_ts[prod_ts] = tot_ts[prod_ts] * technologies.loc[prod_ts2, 'f_max'].values
tot_ts.loc[~tot_ts.index.isin(demand_ts+prod_ts)] = np.nan

tot_ts
tot_ts = time_series.copy().sum(axis=0)
demand = pd.read_csv(path_td_data+'Demand.csv', sep=';', index_col=2, header=0)
technologies = pd.read_csv(path_td_data+'Technologies.csv',  sep=';', index_col=3, header=0, skiprows=[1])
technologies.index = technologies.index.str.strip()

###### THE FOLLOWING 4 LINES MIGHT NEED TO BE ADAPTED ######
demand_ts = ['LIGHTING', 'HEAT_LOW_T_SH']   # Considered eud for the clustering
prod_ts = ['PV', 'Wind_onshore', 'Wind_offshore', 'Hydro_river'] # Considered technologies for the clustering
prod_ts2 = ['PV', 'WIND_ONSHORE', 'WIND_OFFSHORE', 'HYDRO_RIVER']
tot_ts.rename({'Electricity (%_elec)': 'LIGHTING', 'Space Heating (%_sh)': 'HEAT_LOW_T_SH'}, inplace=True)

# multiply demand time series sum by the year consumption
tot_ts[demand_ts] = tot_ts[demand_ts] * demand.loc[demand_ts, :].sum(axis=1, numeric_only=True).values

# Weight the heating time series by a conversion coefficient to account for the difference in energy quality compared to the electricity time series
tot_ts.loc['HEAT_LOW_T_SH'] *= 0.204

# multiply the sum of the production time series by the maximum potential (f_max in GW) of the corresponding technologies
tot_ts[prod_ts] = tot_ts[prod_ts] * technologies.loc[prod_ts2, 'f_max'].values
tot_ts.loc[~tot_ts.index.isin(demand_ts+prod_ts)] = np.nan

tot_ts

Out[5]:

LIGHTING                        31850.621549
HEAT_LOW_T_SH                   18795.713182
Passanger mobility (%_pass)              NaN
Freight mobility (%_freight)             NaN
PV                              61560.846856
Wind_onshore                    21309.163848
Wind_offshore                   21655.949652
Hydro_river                       487.581600
Solar                                    NaN
dtype: float64

4. Build Weighted Data Matrix¶

Multiply each daily profile by its series weight
Reshape into DIM×DAYS format for clustering

In [6]:

Copied!





weights = pd.DataFrame()

# Add Cell_w to the weights data frame
weights['Cell_w'] = tot_ts

# 4. normalize weights
demand_total = weights.loc[demand_ts, 'Cell_w'].sum()
prod_total = weights.loc[prod_ts, 'Cell_w'].sum()

weights['Weights_n'] = weights['Cell_w'] 
weights.loc[weights['Weights_n'] < 0.001, 'Weights_n'] = np.nan

# Reparition of weight between demand and production (1:1)
weights.loc[demand_ts, 'Weights_n'] = weights.loc[demand_ts, 'Weights_n'] / demand_total / 2
weights.loc[prod_ts, 'Weights_n'] = weights.loc[prod_ts, 'Weights_n'] / prod_total / 2

#Weights can be modified here
weights
weights = pd.DataFrame()

# Add Cell_w to the weights data frame
weights['Cell_w'] = tot_ts

# 4. normalize weights
demand_total = weights.loc[demand_ts, 'Cell_w'].sum()
prod_total = weights.loc[prod_ts, 'Cell_w'].sum()

weights['Weights_n'] = weights['Cell_w'] 
weights.loc[weights['Weights_n'] < 0.001, 'Weights_n'] = np.nan

# Reparition of weight between demand and production (1:1)
weights.loc[demand_ts, 'Weights_n'] = weights.loc[demand_ts, 'Weights_n'] / demand_total / 2
weights.loc[prod_ts, 'Weights_n'] = weights.loc[prod_ts, 'Weights_n'] / prod_total / 2

#Weights can be modified here
weights

Out[6]:

	Cell_w	Weights_n
LIGHTING	31850.621549	0.314442
HEAT_LOW_T_SH	18795.713182	0.185558
Passanger mobility (%_pass)	NaN	NaN
Freight mobility (%_freight)	NaN	NaN
PV	61560.846856	0.293109
Wind_onshore	21309.163848	0.101459
Wind_offshore	21655.949652	0.103110
Hydro_river	487.581600	0.002322
Solar	NaN	NaN

In [7]:

Copied!





# use numpy broadcasting to multiply each time series by its weight
m, n = map(len, n_daily_ts.transpose().index.levels)
a0 = weights.loc[:, 'Weights_n'].values.reshape(m, -1)
a1 = n_daily_ts.transpose().values.reshape(m, n, -1)
out = (a1 * a0[..., None, :]).reshape(-1, a1.shape[-1])
n_data = pd.DataFrame(out, index=n_daily_ts.transpose().index, columns=n_daily_ts.transpose().columns)
# drop ts without weight
n_data.dropna(axis=0, how='any', inplace=True)

n_data
# use numpy broadcasting to multiply each time series by its weight
m, n = map(len, n_daily_ts.transpose().index.levels)
a0 = weights.loc[:, 'Weights_n'].values.reshape(m, -1)
a1 = n_daily_ts.transpose().values.reshape(m, n, -1)
out = (a1 * a0[..., None, :]).reshape(-1, a1.shape[-1])
n_data = pd.DataFrame(out, index=n_daily_ts.transpose().index, columns=n_daily_ts.transpose().columns)
# drop ts without weight
n_data.dropna(axis=0, how='any', inplace=True)

n_data

Out[7]:

	Days	1	2	3	4	5	6	7	8	9	10	...	356	357	358	359	360	361	362	363	364	365
	H_of_D
LIGHTING	1	2.867714e-05	2.087897e-05	3.322051e-05	3.439276e-05	2.993491e-05	2.807970e-05	3.662844e-05	3.955275e-05	3.888834e-05	4.158311e-05	...	2.874003e-05	3.241083e-05	3.106690e-05	2.987202e-05	2.540694e-05	3.305134e-05	3.481568e-05	3.603635e-05	3.556500e-05	3.461035e-05
	2	2.245118e-05	1.499890e-05	2.487239e-05	2.701059e-05	2.345740e-05	2.260840e-05	2.977769e-05	3.202406e-05	3.084679e-05	3.427201e-05	...	2.358317e-05	2.452650e-05	2.301718e-05	2.276562e-05	1.974698e-05	2.748226e-05	2.924313e-05	3.094112e-05	2.943180e-05	2.795392e-05
	3	1.660255e-05	1.021937e-05	2.131919e-05	2.138208e-05	1.823765e-05	1.855210e-05	2.518683e-05	2.613016e-05	2.647604e-05	3.002924e-05	...	1.874076e-05	1.952687e-05	1.732577e-05	1.603656e-05	1.506179e-05	2.251407e-05	2.499816e-05	2.609871e-05	2.377184e-05	2.235685e-05
	4	1.182303e-05	7.483727e-06	1.817476e-05	1.908665e-05	1.603656e-05	1.553345e-05	2.330017e-05	2.465228e-05	2.471516e-05	2.889725e-05	...	1.606800e-05	1.751444e-05	1.487312e-05	1.185447e-05	1.229469e-05	1.930676e-05	2.100475e-05	2.289140e-05	2.084752e-05	1.962120e-05
	5	9.747711e-06	8.081167e-06	1.855210e-05	1.814332e-05	1.550201e-05	1.660255e-05	2.518683e-05	2.760803e-05	2.581571e-05	3.065812e-05	...	1.697988e-05	1.773455e-05	1.477879e-05	1.053382e-05	1.245192e-05	1.826910e-05	1.911809e-05	2.179085e-05	2.040731e-05	1.842632e-05
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
Hydro_river	20	1.990114e-07	1.957976e-07	1.951026e-07	1.950242e-07	1.949406e-07	1.948622e-07	1.947786e-07	1.946950e-07	1.946271e-07	1.929182e-07	...	1.609472e-07	1.600693e-07	1.591862e-07	1.583030e-07	1.574094e-07	1.565263e-07	1.568816e-07	1.575192e-07	1.581671e-07	1.588047e-07
	21	1.988756e-07	1.956617e-07	1.951026e-07	1.950242e-07	1.949406e-07	1.948622e-07	1.947786e-07	1.946950e-07	1.946166e-07	1.928399e-07	...	1.609054e-07	1.600275e-07	1.591444e-07	1.582612e-07	1.573833e-07	1.565001e-07	1.569077e-07	1.575453e-07	1.581933e-07	1.588308e-07
	22	1.987397e-07	1.955259e-07	1.951026e-07	1.950242e-07	1.949406e-07	1.948622e-07	1.947786e-07	1.946950e-07	1.946166e-07	1.927458e-07	...	1.608793e-07	1.600014e-07	1.591025e-07	1.582246e-07	1.573415e-07	1.564583e-07	1.569339e-07	1.575714e-07	1.582246e-07	1.588622e-07
	23	1.986038e-07	1.953900e-07	1.950921e-07	1.950085e-07	1.949406e-07	1.948622e-07	1.947786e-07	1.946950e-07	1.946166e-07	1.926622e-07	...	1.608375e-07	1.599596e-07	1.590764e-07	1.581933e-07	1.572997e-07	1.564217e-07	1.569600e-07	1.575975e-07	1.582508e-07	1.588883e-07
	24	1.984680e-07	1.952541e-07	1.950921e-07	1.950085e-07	1.949301e-07	1.948465e-07	1.947629e-07	1.946950e-07	1.946166e-07	1.925681e-07	...	1.608009e-07	1.599178e-07	1.590346e-07	1.581567e-07	1.572735e-07	1.563799e-07	1.569914e-07	1.576237e-07	1.582769e-07	1.589144e-07

144 rows × 365 columns

5. MILP Clustering of Typical Days¶

Define AMPL model with binary selection & assignment variables
Minimize total Euclidean distance
Solve with CPLEX

In [8]:

Copied!





cplex_options = ['mipdisplay=5',
                    'mipinterval=1000',
                    'mipgap=1e-6']
cplex_options_str = ' '.join(cplex_options)
options = {'show_stats': 3,
            'times': 1,
            'gentimes': 1,
            'solver': 'cplex',
            'cplex_options': cplex_options_str}
td_model = AMPL()
td_model.setOption('solver', 'cplex')
td_model.setOption('cplex_options', cplex_options_str)

td_model.eval(r"""
#----------------------------------------------------------------------------------
# TYPICAL DAY SELECTION
# from F. Dominguez-Munoz et al., Selection of typical demand days for CHP optimization, 2011
#----------------------------------------------------------------------------------

############################
###  MILP formulation    ###
############################
set DIMENSIONS ;		# Number of input data per day (24h x nbr of time series)
set DAYS := 1 .. 365;			# Number of days 

### parameters
param Nbr_TD default 12; 				#Number of TD days
param Ndata{DAYS,DIMENSIONS}; 			#Input data (already normalized)
param Distance{i in DAYS,j in DAYS} := sum{k in DIMENSIONS}((Ndata[i,k]-Ndata[j,k])*(Ndata[i,k]-Ndata[j,k])) ; # Distance matrix.

### Variables
var Selected_TD {DAYS} 				binary;# default 0; #which are the typical days 
var Cluster_matrix {DAYS,DAYS} 		binary;# default 0; #which day corresponds to which typical day

### Constraints 
# Allocate one cluster centre (i) to each day (j) 
subject to allocate_1TD_per_day{j in DAYS}:
	sum{i in DAYS} Cluster_matrix[i,j] = 1;

# If cluster not allocated, it needs to be null 
subject to other_TD_null {i in DAYS,j in DAYS}:
	Cluster_matrix[i,j] <= Selected_TD[i]; 

# Limit the number of TD
subject to limit_number_of_TD:
	sum{i in DAYS} Selected_TD[i] = Nbr_TD;

#-Objective
minimize Euclidean_distance: 
	sum{i in DAYS,j in DAYS} Distance[i,j]*Cluster_matrix[i,j];
""")

td_model.param['Nbr_TD'] = nbr_td

# Use only the second level of the index for DAYS
n_data.index = n_data.index.get_level_values(1)

# Ensure columns are integers for DIMENSIONS
n_data.columns = n_data.columns.astype(int)

n_data = n_data.transpose()

# td_model.set['DAYS'] = list(n_data.index.unique())
td_model.set['DIMENSIONS'] = list(range(1,len(n_data.columns)+1)) # should be 144 
n_data.columns = list(range(1,len(n_data.columns)+1)) 
# Now convert to long format and assign to parameter
td_model.param['Ndata'] = n_data.stack()

td_model.solve()
cplex_options = ['mipdisplay=5',
                    'mipinterval=1000',
                    'mipgap=1e-6']
cplex_options_str = ' '.join(cplex_options)
options = {'show_stats': 3,
            'times': 1,
            'gentimes': 1,
            'solver': 'cplex',
            'cplex_options': cplex_options_str}
td_model = AMPL()
td_model.setOption('solver', 'cplex')
td_model.setOption('cplex_options', cplex_options_str)

td_model.eval(r"""
#----------------------------------------------------------------------------------
# TYPICAL DAY SELECTION
# from F. Dominguez-Munoz et al., Selection of typical demand days for CHP optimization, 2011
#----------------------------------------------------------------------------------

############################
###  MILP formulation    ###
############################
set DIMENSIONS ;		# Number of input data per day (24h x nbr of time series)
set DAYS := 1 .. 365;			# Number of days 

### parameters
param Nbr_TD default 12; 				#Number of TD days
param Ndata{DAYS,DIMENSIONS}; 			#Input data (already normalized)
param Distance{i in DAYS,j in DAYS} := sum{k in DIMENSIONS}((Ndata[i,k]-Ndata[j,k])*(Ndata[i,k]-Ndata[j,k])) ; # Distance matrix.

### Variables
var Selected_TD {DAYS} 				binary;# default 0; #which are the typical days 
var Cluster_matrix {DAYS,DAYS} 		binary;# default 0; #which day corresponds to which typical day

### Constraints 
# Allocate one cluster centre (i) to each day (j) 
subject to allocate_1TD_per_day{j in DAYS}:
	sum{i in DAYS} Cluster_matrix[i,j] = 1;

# If cluster not allocated, it needs to be null 
subject to other_TD_null {i in DAYS,j in DAYS}:
	Cluster_matrix[i,j] <= Selected_TD[i]; 

# Limit the number of TD
subject to limit_number_of_TD:
	sum{i in DAYS} Selected_TD[i] = Nbr_TD;

#-Objective
minimize Euclidean_distance: 
	sum{i in DAYS,j in DAYS} Distance[i,j]*Cluster_matrix[i,j];
""")

td_model.param['Nbr_TD'] = nbr_td

# Use only the second level of the index for DAYS
n_data.index = n_data.index.get_level_values(1)

# Ensure columns are integers for DIMENSIONS
n_data.columns = n_data.columns.astype(int)

n_data = n_data.transpose()

# td_model.set['DAYS'] = list(n_data.index.unique())
td_model.set['DIMENSIONS'] = list(range(1,len(n_data.columns)+1)) # should be 144 
n_data.columns = list(range(1,len(n_data.columns)+1)) 
# Now convert to long format and assign to parameter
td_model.param['Ndata'] = n_data.stack()

td_model.solve()

License file /root/.local/share/hatch/env/virtual/energyscope/4BIeM0-F/docs/lib/python3.10/site-packages/ampl_module_base/bin/ampl.lic:

Today = 20251022; found license file "/root/.local/share/hatch/env/virtual/energyscope/4BIeM0-F/docs/lib/python3.10/site-packages/ampl_module_base/bin/ampl.lic":

# Bundle #7173.7706 expiring 20260115: ME-451 Advanced Energetics, Prof. Fran?ois Mar?chal, EPFL
68f883e0 226cf27b 693060ec c06b82b8 e37dd160 b2e4edb1 297a1d3e b1d43eb0
006e6f05 11dcb221 bfba30c3 e3d12b50 11dd63e5 a684fc7f 821ca490 b27eb0af
27a32163 f5ba16a0 2bf406d7 357f574d b73b53d3 0634c096 8e60f609 d6459965
953a11a6 817c7841 d610a364 d33a36b6 80881d84 803dc114 ce316aa0 21954b6d
26a38b04 e698dc60 2946c7fe e29da271 97bba4c7 527dc0b7 03c0c013 7262c94b
83551b80 f66b4790 afea9f6c 33ee45d0 635aa161 24ae8286 297b9178 820d3892
f36b0f61 2389060e 21a702e4 62e0c451 27ce3ab4 b1f6c27b 6c214c1c b165c51e
b58188e1 252d3a24 67b8164f d5f236e5 47914842 d00bf2eb f000a5e1 f0ee8820
b10e1a01 f1fba552 8d88ee17 778f4767 776cd057 a73e3414 17b53833 2604cb2e
65cc1c97 9411c487 c1813a0b 039b8e7e 6615aa96 065e29ca a80aa9d8 d04e15b4
17884450 4306f96d ac468c87 c786c6a7 81a8b4a5 71c3fd71 abbaca8e 4220481f
c50ac4e3 7426d3a0 6e42b4ea e4897874 f17d0462 c47437a3 34969590 b6a5b8a6
c410cbe6 37035b82 3a8ae144 f32a0c30 35cbf244 e6184ba9 41db7ea6 44bf5f32
330a3676 071e3b85 0eeb02df 657b3256 43248796 16f09fbb 3107c24f 86f57d73
b2af4c56 c5781332 f2315793 24d27b95 c53cce34 70d43020 9ac50d5d 41d8523e
b487c781 2305a864 8022e231 c0860449 9767d1f1 e6308900 b23766d0 85c91813
24b3c0b7 c7b681cc 6b3f2933 c2ae35a5 25167930 31324c8e 4096199c 27ce3b25
a2c3cb60 247a99f2 11208b69 e60d91c4 e5b6c6d7 33a37746 2156c5a3 34e06337
07145967 946ffedf cb16980a 810a7776 e5ef0856 d68060ae 31bb87bb 83d1

Computed 1-1-0-fbc949fe
runner--pdbxrphm-project-62195384-concurrent-0
6E:0F:9C:36:30:03

HOSTINFO.NPROC.4
HOSTINFO.CORES.4
No cplex license for this machine.

exit value 2

<BREAK>

6. Extract Clusters & Export DAT¶

Retrieve assignments and save .dat file for Energyscope

In [9]:

Copied!





cluster_matrix = td_model.getVariable('Cluster_matrix').getValues().toPandas()
cluster_matrix = cluster_matrix[cluster_matrix.sum(axis=1) > 0]

# transform the matrix so that the second index is the only index and the first index is the typical day in a new column
cluster_matrix = cluster_matrix.reset_index()
cluster_matrix = cluster_matrix.drop(columns=['Cluster_matrix.val'])
cluster_matrix.rename(columns={'index0': 'TypicalDay'}, inplace=True)  
# set index to index0
cluster_matrix.set_index('index1', inplace=True)
cluster_matrix.sort_index(inplace=True)  
td_dat_file_generation(time_series, cluster_matrix, nbr_td, 'tutorial_output/'+'ESTD_' + str(nbr_td) + 'TD.dat')
print(cluster_matrix['TypicalDay'].unique())
cluster_matrix = td_model.getVariable('Cluster_matrix').getValues().toPandas()
cluster_matrix = cluster_matrix[cluster_matrix.sum(axis=1) > 0]

# transform the matrix so that the second index is the only index and the first index is the typical day in a new column
cluster_matrix = cluster_matrix.reset_index()
cluster_matrix = cluster_matrix.drop(columns=['Cluster_matrix.val'])
cluster_matrix.rename(columns={'index0': 'TypicalDay'}, inplace=True)  
# set index to index0
cluster_matrix.set_index('index1', inplace=True)
cluster_matrix.sort_index(inplace=True)  
td_dat_file_generation(time_series, cluster_matrix, nbr_td, 'tutorial_output/'+'ESTD_' + str(nbr_td) + 'TD.dat')
print(cluster_matrix['TypicalDay'].unique())

---------------------------------------------------------------------------
InvalidIndexError                         Traceback (most recent call last)
Cell In[9], line 11
      9 cluster_matrix.set_index('index1', inplace=True)
     10 cluster_matrix.sort_index(inplace=True)  
---> 11 td_dat_file_generation(time_series, cluster_matrix, nbr_td, 'tutorial_output/'+'ESTD_' + str(nbr_td) + 'TD.dat')
     12 print(cluster_matrix['TypicalDay'].unique())

Cell In[1], line 22, in td_dat_file_generation(time_series, cluster_matrix, nbr_td, out_path)
     19 out_path ='tutorial_output/'+'ESTD_' + str(nbr_td) + 'TD.dat'
     21 # READING OUTPUT OF STEP1 #
---> 22 td_data = generate_t_h_td(cluster_matrix.reset_index(drop=True).rename(columns={'TypicalDay': 'TD_of_days'}), nbr_td)
     23 # config['td_data'] = td_data
     24 
     25 # COMPUTING NUMBER OF DAYS REPRESENTED BY EACH TD #
     26 sorted_td = td_data['td_count'].copy()

Cell In[1], line 176, in generate_t_h_td(td_of_days, nbr_td)
    172 t_h_td = pd.DataFrame(np.repeat(td_of_days['TD_of_days'].values, 24, axis=0),
    173                     columns=['TD_of_days'])  # column TD_of_days is each TD repeated 24 times
    174 map_td = dict(zip(td_count['TD_of_days'],
    175                 np.arange(1, nbr_td + 1)))  # mapping dictionnary from TD_of_Days to TD number
--> 176 t_h_td['TD_number'] = t_h_td['TD_of_days'].map(map_td)
    177 t_h_td['H_of_D'] = np.resize(np.arange(1, 25), t_h_td.shape[0])  # 365 times hours from 1 to 24
    178 t_h_td['H_of_Y'] = np.arange(1, 8761)

File ~/.local/share/hatch/env/virtual/energyscope/4BIeM0-F/docs/lib/python3.10/site-packages/pandas/core/series.py:4719, in Series.map(self, arg, na_action)
   4639 def map(
   4640     self,
   4641     arg: Callable | Mapping | Series,
   4642     na_action: Literal["ignore"] | None = None,
   4643 ) -> Series:
   4644     """
   4645     Map values of Series according to an input mapping or function.
   4646 
   (...)
   4717     dtype: object
   4718     """
-> 4719     new_values = self._map_values(arg, na_action=na_action)
   4720     return self._constructor(new_values, index=self.index, copy=False).__finalize__(
   4721         self, method="map"
   4722     )

File ~/.local/share/hatch/env/virtual/energyscope/4BIeM0-F/docs/lib/python3.10/site-packages/pandas/core/base.py:925, in IndexOpsMixin._map_values(self, mapper, na_action, convert)
    922 if isinstance(arr, ExtensionArray):
    923     return arr.map(mapper, na_action=na_action)
--> 925 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)

File ~/.local/share/hatch/env/virtual/energyscope/4BIeM0-F/docs/lib/python3.10/site-packages/pandas/core/algorithms.py:1732, in map_array(arr, mapper, na_action, convert)
   1728     mapper = mapper[mapper.index.notna()]
   1730 # Since values were input this means we came from either
   1731 # a dict or a series and mapper should be an index
-> 1732 indexer = mapper.index.get_indexer(arr)
   1733 new_values = take_nd(mapper._values, indexer)
   1735 return new_values

File ~/.local/share/hatch/env/virtual/energyscope/4BIeM0-F/docs/lib/python3.10/site-packages/pandas/core/indexes/base.py:3892, in Index.get_indexer(self, target, method, limit, tolerance)
   3889 self._check_indexing_method(method, limit, tolerance)
   3891 if not self._index_as_unique:
-> 3892     raise InvalidIndexError(self._requires_unique_msg)
   3894 if len(target) == 0:
   3895     return np.array([], dtype=np.intp)

InvalidIndexError: Reindexing only valid with uniquely valued Index objects

7. Visualization of Typical Days¶

Scatter plot assignments over the calendar year

In [10]:

Copied!





# Plotting of the typical days through the year
# Convert the index to datetime for better plotting
cluster_matrix.index = pd.to_datetime(cluster_matrix.index, format='%j')  # %j is the day of the year
# Create a new DataFrame for plotting
td_plot = cluster_matrix.reset_index()
td_plot.rename(columns={'index1': 'DayOfYear'}, inplace=True)
td_plot['DayOfYear'] = pd.to_datetime(td_plot['DayOfYear'], format='%j')
# Plotting
plt.figure(figsize=(12, 6))
sns.scatterplot(data=td_plot, x='DayOfYear', y='TypicalDay', hue='TypicalDay', palette='viridis', s=100, legend=None)
plt.title('Typical Days Assigned to Each Day of the Year')
plt.xlabel('Day of the Year')
plt.ylabel('Typical Day')
plt.xticks(rotation=45)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.grid(True)
plt.tight_layout()
display(plt.show())
# Plotting of the typical days through the year
# Convert the index to datetime for better plotting
cluster_matrix.index = pd.to_datetime(cluster_matrix.index, format='%j')  # %j is the day of the year
# Create a new DataFrame for plotting
td_plot = cluster_matrix.reset_index()
td_plot.rename(columns={'index1': 'DayOfYear'}, inplace=True)
td_plot['DayOfYear'] = pd.to_datetime(td_plot['DayOfYear'], format='%j')
# Plotting
plt.figure(figsize=(12, 6))
sns.scatterplot(data=td_plot, x='DayOfYear', y='TypicalDay', hue='TypicalDay', palette='viridis', s=100, legend=None)
plt.title('Typical Days Assigned to Each Day of the Year')
plt.xlabel('Day of the Year')
plt.ylabel('Typical Day')
plt.xticks(rotation=45)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.grid(True)
plt.tight_layout()
display(plt.show())

/tmp/ipykernel_1144/116129502.py:10: UserWarning: Ignoring `palette` because no `hue` variable has been assigned.
  sns.scatterplot(data=td_plot, x='DayOfYear', y='TypicalDay', hue='TypicalDay', palette='viridis', s=100, legend=None)

No description has been provided for this image

None

In [11]:

Copied!





# Plotting the differents times series for each features
# Get the list of representative (cluster-centre) days
typical_days = sorted(cluster_matrix['TypicalDay'].unique())

# Features available in the normalized daily pivot (e.g., LIGHTING, HEAT_LOW_T_SH)
features = n_daily_ts.columns.levels[0]

# For each feature, overlay all typical-day profiles
for var in features:
    plt.figure(figsize=(10, 4))
    for day in typical_days:
        # Extract hour 1–24 profile for this feature on the centre day
        profile = n_daily_ts.loc[day][var]
        plt.plot(profile.index, profile.values, label=f'Day {day}')
    plt.title(f'{var} Profile for Typical Days')
    plt.xlabel('Hour of Day')
    plt.ylabel(var)
    plt.grid(True)
    plt.legend(ncol=3, fontsize='small')
    plt.tight_layout()
    display(plt.show())
# Plotting the differents times series for each features
# Get the list of representative (cluster-centre) days
typical_days = sorted(cluster_matrix['TypicalDay'].unique())

# Features available in the normalized daily pivot (e.g., LIGHTING, HEAT_LOW_T_SH)
features = n_daily_ts.columns.levels[0]

# For each feature, overlay all typical-day profiles
for var in features:
    plt.figure(figsize=(10, 4))
    for day in typical_days:
        # Extract hour 1–24 profile for this feature on the centre day
        profile = n_daily_ts.loc[day][var]
        plt.plot(profile.index, profile.values, label=f'Day {day}')
    plt.title(f'{var} Profile for Typical Days')
    plt.xlabel('Hour of Day')
    plt.ylabel(var)
    plt.grid(True)
    plt.legend(ncol=3, fontsize='small')
    plt.tight_layout()
    display(plt.show())

/tmp/ipykernel_1144/2478364865.py:19: UserWarning: No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  plt.legend(ncol=3, fontsize='small')

None

None

None

None

None

None

None

None

None