import os
from pathlib import Path
import h5py
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
plt.switch_backend("agg")
[docs]def plot_lightcurves(df, SNIDs, settings):
"""Utility for gridspec of lightcruves
Args:
df (pandas.DataFrame): dataframe holding the data
SNIDs (np.array or list): holds lightcurve ids
settings (ExperimentSettings): controls experiment hyperparameters
"""
plt.figure(figsize=(20, 10))
plt.suptitle("Sample of SN Ia light curves")
gs = gridspec.GridSpec(4, 4, hspace=0.4)
for i in range(16):
ax = plt.subplot(gs[i])
SNID = SNIDs[i]
df_temp = df.loc[SNID]
# Prepare plotting data in a dict
d = {
flt: {"FLUXCAL": [], "FLUXCALERR": [], "MJD": []}
for flt in settings.list_filters
}
current_time = 0
for idx in range(len(df_temp)):
flt = df_temp.FLT.values[idx]
d[flt]["FLUXCAL"].append(df_temp.FLUXCAL.values[idx])
d[flt]["FLUXCALERR"].append(df_temp.FLUXCALERR.values[idx])
current_time += df_temp.delta_time.values[idx]
d[flt]["MJD"].append(current_time)
for flt in d.keys():
time = d[flt]["MJD"]
# Only plot a time series if it's non empty
if len(time) > 0:
flux = d[flt]["FLUXCAL"]
fluxerr = d[flt]["FLUXCALERR"]
ax.errorbar(time, flux, yerr=fluxerr, label=f"Filter {flt}")
ax.set_title(SNID, fontsize=18)
ax.set_aspect("auto")
ax.legend(loc="best")
plt.savefig(Path(settings.explore_dir) / "sample_lightcurves.png")
[docs]def plot_random_preprocessed_lightcurves(settings, SNIDs):
"""Plot lightcurves specified by SNID_idxs from the
preprocessed, pickled database
Args:
settings (ExperimentSettings): controls experiment hyperparameters
SNIDs (list): list of SN lightcurve IDs to plot
"""
list_files = [
f for f in glob.glob(os.path.join(settings.preprocessed_dir, "*_PHOT.pickle"))
]
df = pd.concat(list(map(pd.read_pickle, list_files))).set_index("SNID")
# Plot and save
plot_lightcurves(df, SNIDs, settings)
[docs]def plot_lightcurves_from_hdf5(settings, SNID_idxs):
"""Plot lightcurves specified by SNID_idxs from the
HDF5 database
Args:
settings (ExperimentSettings): controls experiment hyperparameters
SNID_idxs (list): list of SN lightcurve index to plot
"""
with h5py.File(settings.hdf5_file_name, "r") as hf:
features = hf["features"][:].astype(str)
n_features = len(features)
plt.figure(figsize=(20, 10))
gs = gridspec.GridSpec(4, 4, hspace=0.4)
for idx, SNID_idx in enumerate(SNID_idxs):
ax = plt.subplot(gs[idx])
SNID = hf["SNID"][SNID_idx]
str(hf["PEAKMJD"][SNID_idx])
PEAKMJDNORM = hf["PEAKMJDNORM"][SNID_idx]
typ = hf[settings.sntype_var][SNID_idx]
typ = settings.sntypes[str(typ)]
data = hf["data"][SNID_idx].reshape(-1, n_features)
df = pd.DataFrame(data, columns=features)
non_filter_columns = [
"FLUXCAL_g",
"FLUXCAL_i",
"FLUXCAL_r",
"FLUXCAL_z",
"FLUXCALERR_g",
"FLUXCALERR_i",
"FLUXCALERR_r",
"FLUXCALERR_z",
"delta_time",
"HOSTGAL_PHOTOZ",
"HOSTGAL_PHOTOZ_ERR",
"HOSTGAL_SPECZ",
"HOSTGAL_SPECZ_ERR",
]
filter_columns = [
c for c in df.columns.values if c not in non_filter_columns
]
present_filters = df[filter_columns].transpose().idxmax().values
list_present_filters = [set(f) for f in present_filters]
max_y = -float("Inf")
min_y = float("Inf")
for FLT in settings.list_filters:
idxs = np.array(
[i for i in range(len(df)) if FLT in list_present_filters[i]]
)
if len(idxs) == 0:
continue
arr_flux = df[f"FLUXCAL_{FLT}"].values[idxs]
arr_fluxerr = df[f"FLUXCALERR_{FLT}"].values[idxs]
arr_time = df["delta_time"].cumsum().values[idxs]
ax.errorbar(arr_time, arr_flux, yerr=arr_fluxerr, label=f"Filter {FLT}")
if np.max(arr_flux) > max_y:
max_y = np.max(arr_flux)
if np.min(arr_flux) < min_y:
min_y = np.min(arr_flux)
ax.plot(
[PEAKMJDNORM, PEAKMJDNORM], [min_y, max_y], color="k", linestyle="--"
)
ax.set_title(f"{SNID.decode('utf8')} -- {typ}", fontsize=18)
ax.set_aspect("auto")
ax.legend(loc="best")
plt.savefig(Path(settings.explore_dir) / "sample_lightcurves_from_hdf5.png")
[docs]def visualize(settings):
"""Plot a random subset of lightcurves
2 plots: one with preprocessed data and one with processed data
The two plots should show the same data
Args:
settings (ExperimentSettings): controls experiment hyperparameters
"""
# Check the data has been created
settings.check_data_exists()
# Set a random seed
np.random.seed()
with h5py.File(settings.hdf5_file_name, "r") as hf:
SNID_idxs = np.random.permutation(hf["SNID"].shape[0])[:16]
SNIDs = hf["SNID"][:][SNID_idxs]
SNIDs = [i for i in np.array([k for k in SNIDs]).astype(str)]
plot_random_preprocessed_lightcurves(settings, SNIDs)
plot_lightcurves_from_hdf5(settings, SNID_idxs)