Source code for supernnova.visualization.visualize

import os
from pathlib import Path
import h5py
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

plt.switch_backend("agg")


[docs]def plot_lightcurves(df, SNIDs, settings): """Utility for gridspec of lightcruves Args: df (pandas.DataFrame): dataframe holding the data SNIDs (np.array or list): holds lightcurve ids settings (ExperimentSettings): controls experiment hyperparameters """ plt.figure(figsize=(20, 10)) plt.suptitle("Sample of SN Ia light curves") gs = gridspec.GridSpec(4, 4, hspace=0.4) for i in range(16): ax = plt.subplot(gs[i]) SNID = SNIDs[i] df_temp = df.loc[SNID] # Prepare plotting data in a dict d = { flt: {"FLUXCAL": [], "FLUXCALERR": [], "MJD": []} for flt in settings.list_filters } current_time = 0 for idx in range(len(df_temp)): flt = df_temp.FLT.values[idx] d[flt]["FLUXCAL"].append(df_temp.FLUXCAL.values[idx]) d[flt]["FLUXCALERR"].append(df_temp.FLUXCALERR.values[idx]) current_time += df_temp.delta_time.values[idx] d[flt]["MJD"].append(current_time) for flt in d.keys(): time = d[flt]["MJD"] # Only plot a time series if it's non empty if len(time) > 0: flux = d[flt]["FLUXCAL"] fluxerr = d[flt]["FLUXCALERR"] ax.errorbar(time, flux, yerr=fluxerr, label=f"Filter {flt}") ax.set_title(SNID, fontsize=18) ax.set_aspect("auto") ax.legend(loc="best") plt.savefig(Path(settings.explore_dir) / "sample_lightcurves.png")
[docs]def plot_random_preprocessed_lightcurves(settings, SNIDs): """Plot lightcurves specified by SNID_idxs from the preprocessed, pickled database Args: settings (ExperimentSettings): controls experiment hyperparameters SNIDs (list): list of SN lightcurve IDs to plot """ list_files = [ f for f in glob.glob(os.path.join(settings.preprocessed_dir, "*_PHOT.pickle")) ] df = pd.concat(list(map(pd.read_pickle, list_files))).set_index("SNID") # Plot and save plot_lightcurves(df, SNIDs, settings)
[docs]def plot_lightcurves_from_hdf5(settings, SNID_idxs): """Plot lightcurves specified by SNID_idxs from the HDF5 database Args: settings (ExperimentSettings): controls experiment hyperparameters SNID_idxs (list): list of SN lightcurve index to plot """ with h5py.File(settings.hdf5_file_name, "r") as hf: features = hf["features"][:].astype(str) n_features = len(features) plt.figure(figsize=(20, 10)) gs = gridspec.GridSpec(4, 4, hspace=0.4) for idx, SNID_idx in enumerate(SNID_idxs): ax = plt.subplot(gs[idx]) SNID = hf["SNID"][SNID_idx] str(hf["PEAKMJD"][SNID_idx]) PEAKMJDNORM = hf["PEAKMJDNORM"][SNID_idx] typ = hf[settings.sntype_var][SNID_idx] typ = settings.sntypes[str(typ)] data = hf["data"][SNID_idx].reshape(-1, n_features) df = pd.DataFrame(data, columns=features) non_filter_columns = [ "FLUXCAL_g", "FLUXCAL_i", "FLUXCAL_r", "FLUXCAL_z", "FLUXCALERR_g", "FLUXCALERR_i", "FLUXCALERR_r", "FLUXCALERR_z", "delta_time", "HOSTGAL_PHOTOZ", "HOSTGAL_PHOTOZ_ERR", "HOSTGAL_SPECZ", "HOSTGAL_SPECZ_ERR", ] filter_columns = [ c for c in df.columns.values if c not in non_filter_columns ] present_filters = df[filter_columns].transpose().idxmax().values list_present_filters = [set(f) for f in present_filters] max_y = -float("Inf") min_y = float("Inf") for FLT in settings.list_filters: idxs = np.array( [i for i in range(len(df)) if FLT in list_present_filters[i]] ) if len(idxs) == 0: continue arr_flux = df[f"FLUXCAL_{FLT}"].values[idxs] arr_fluxerr = df[f"FLUXCALERR_{FLT}"].values[idxs] arr_time = df["delta_time"].cumsum().values[idxs] ax.errorbar(arr_time, arr_flux, yerr=arr_fluxerr, label=f"Filter {FLT}") if np.max(arr_flux) > max_y: max_y = np.max(arr_flux) if np.min(arr_flux) < min_y: min_y = np.min(arr_flux) ax.plot( [PEAKMJDNORM, PEAKMJDNORM], [min_y, max_y], color="k", linestyle="--" ) ax.set_title(f"{SNID.decode('utf8')} -- {typ}", fontsize=18) ax.set_aspect("auto") ax.legend(loc="best") plt.savefig(Path(settings.explore_dir) / "sample_lightcurves_from_hdf5.png")
[docs]def visualize(settings): """Plot a random subset of lightcurves 2 plots: one with preprocessed data and one with processed data The two plots should show the same data Args: settings (ExperimentSettings): controls experiment hyperparameters """ # Check the data has been created settings.check_data_exists() # Set a random seed np.random.seed() with h5py.File(settings.hdf5_file_name, "r") as hf: SNID_idxs = np.random.permutation(hf["SNID"].shape[0])[:16] SNIDs = hf["SNID"][:][SNID_idxs] SNIDs = [i for i in np.array([k for k in SNIDs]).astype(str)] plot_random_preprocessed_lightcurves(settings, SNIDs) plot_lightcurves_from_hdf5(settings, SNID_idxs)