Source code for supernnova.visualization.visualize

import os
from pathlib import Path
import h5py
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec


[docs]def plot_lightcurves(df, SNIDs, settings): """Utility for gridspec of lightcruves Args: df (pandas.DataFrame): dataframe holding the data SNIDs (np.array or list): holds lightcurve ids settings (ExperimentSettings): controls experiment hyperparameters """ plt.figure(figsize=(20, 10)) plt.suptitle("Sample of SN Ia light curves") gs = gridspec.GridSpec(4, 4, hspace=0.4) for i in range(16): ax = plt.subplot(gs[i]) SNID = SNIDs[i] df_temp = df.loc[SNID] # Prepare plotting data in a dict d = { flt: {"FLUXCAL": [], "FLUXCALERR": [], "MJD": []} for flt in settings.list_filters } current_time = 0 for idx in range(len(df_temp)): flt = df_temp.FLT.values[idx] d[flt]["FLUXCAL"].append(df_temp.FLUXCAL.values[idx]) d[flt]["FLUXCALERR"].append(df_temp.FLUXCALERR.values[idx]) current_time += df_temp.delta_time.values[idx] d[flt]["MJD"].append(current_time) for flt in d.keys(): time = d[flt]["MJD"] # Only plot a time series if it's non empty if len(time) > 0: flux = d[flt]["FLUXCAL"] fluxerr = d[flt]["FLUXCALERR"] ax.errorbar(time, flux, yerr=fluxerr, label=f"Filter {flt}") ax.set_title(SNID, fontsize=18) ax.set_aspect("auto") ax.legend(loc="best") plt.savefig(Path(settings.explore_dir) / "sample_lightcurves.png")
[docs]def plot_random_preprocessed_lightcurves(settings, SNIDs): """Plot lightcurves specified by SNID_idxs from the preprocessed, pickled database Args: settings (ExperimentSettings): controls experiment hyperparameters SNIDs (list): list of SN lightcurve IDs to plot """ list_files = [ f for f in glob.glob(os.path.join(settings.preprocessed_dir, "*_PHOT.pickle")) ] df = pd.concat(list(map(pd.read_pickle, list_files))).set_index("SNID") # Plot and save plot_lightcurves(df, SNIDs, settings)
[docs]def plot_lightcurves_from_hdf5(settings, SNID_idxs): """Plot lightcurves specified by SNID_idxs from the HDF5 database Args: settings (ExperimentSettings): controls experiment hyperparameters SNID_idxs (list): list of SN lightcurve index to plot """ with h5py.File(settings.hdf5_file_name, "r") as hf: features = hf["features"][:].astype(str) n_features = len(features) plt.figure(figsize=(20, 10)) gs = gridspec.GridSpec(4, 4, hspace=0.4) for idx, SNID_idx in enumerate(SNID_idxs): ax = plt.subplot(gs[idx]) SNID = hf["SNID"][SNID_idx] str(hf["PEAKMJD"][SNID_idx]) PEAKMJDNORM = hf["PEAKMJDNORM"][SNID_idx] typ = hf[settings.sntype_var][SNID_idx] typ = settings.sntypes[str(typ)] data = hf["data"][SNID_idx].reshape(-1, n_features) df = pd.DataFrame(data, columns=features) non_filter_columns = [ "FLUXCAL_g", "FLUXCAL_i", "FLUXCAL_r", "FLUXCAL_z", "FLUXCALERR_g", "FLUXCALERR_i", "FLUXCALERR_r", "FLUXCALERR_z", "delta_time", "HOSTGAL_PHOTOZ", "HOSTGAL_PHOTOZ_ERR", "HOSTGAL_SPECZ", "HOSTGAL_SPECZ_ERR", ] filter_columns = [ c for c in df.columns.values if c not in non_filter_columns ] present_filters = df[filter_columns].transpose().idxmax().values list_present_filters = [set(f) for f in present_filters] max_y = -float("Inf") min_y = float("Inf") for FLT in settings.list_filters: idxs = np.array( [i for i in range(len(df)) if FLT in list_present_filters[i]] ) if len(idxs) == 0: continue arr_flux = df[f"FLUXCAL_{FLT}"].values[idxs] arr_fluxerr = df[f"FLUXCALERR_{FLT}"].values[idxs] arr_time = df["delta_time"].cumsum().values[idxs] ax.errorbar(arr_time, arr_flux, yerr=arr_fluxerr, label=f"Filter {FLT}") if np.max(arr_flux) > max_y: max_y = np.max(arr_flux) if np.min(arr_flux) < min_y: min_y = np.min(arr_flux) ax.plot( [PEAKMJDNORM, PEAKMJDNORM], [min_y, max_y], color="k", linestyle="--" ) ax.set_title(f"{SNID.decode('utf8')} -- {typ}", fontsize=18) ax.set_aspect("auto") ax.legend(loc="best") plt.savefig(Path(settings.explore_dir) / "sample_lightcurves_from_hdf5.png")
[docs]def visualize(settings): """Plot a random subset of lightcurves 2 plots: one with preprocessed data and one with processed data The two plots should show the same data Args: settings (ExperimentSettings): controls experiment hyperparameters """ # Check the data has been created settings.check_data_exists() # Set a random seed np.random.seed() with h5py.File(settings.hdf5_file_name, "r") as hf: SNID_idxs = np.random.permutation(hf["SNID"].shape[0])[:16] SNIDs = hf["SNID"][:][SNID_idxs] SNIDs = [i for i in np.array([k for k in SNIDs]).astype(str)] plot_random_preprocessed_lightcurves(settings, SNIDs) plot_lightcurves_from_hdf5(settings, SNID_idxs)