Source code for candle.viz_utils

from pathlib import Path

import matplotlib as mpl

mpl.use("Agg")
import matplotlib.pyplot as plt
import numpy as np


[docs]def plot_history(out, history, metric="loss", val=True, title=None, width=8, height=6): title = title or "model {}".format(metric) val_metric = "val_{}".format(metric) plt.figure(figsize=(width, height)) plt.plot(history.history[metric], marker="o") if val: plt.plot(history.history[val_metric], marker="d") plt.title(title) plt.ylabel(metric) plt.xlabel("epoch") if val: plt.legend( ["train_{}".format(metric), "val_{}".format(metric)], loc="upper center" ) else: plt.legend(["train_{}".format(metric)], loc="upper center") png = "{}.plot.{}.png".format(out, metric) plt.savefig(png, bbox_inches="tight") plt.close()
[docs]def plot_scatter(data, classes, out, width=10, height=8): cmap = plt.cm.get_cmap("gist_rainbow") plt.figure(figsize=(width, height)) plt.scatter( data[:, 0], data[:, 1], c=classes, cmap=cmap, lw=0.5, edgecolor="black", alpha=0.7, ) plt.colorbar() png = "{}.png".format(out) plt.savefig(png, bbox_inches="tight") plt.close()
def plot_error(y_true, y_pred, batch, file_ext, file_pre="output_dir", subsample=1000): if batch % 10: return total = len(y_true) if subsample and subsample < total: usecols = np.random.choice(total, size=subsample, replace=False) y_true = y_true[usecols] y_pred = y_pred[usecols] y_true = y_true * 100 y_pred = y_pred * 100 diffs = y_pred - y_true bins = np.linspace(-200, 200, 100) if batch == 0: y_shuf = np.random.permutation(y_true) plt.hist(y_shuf - y_true, bins, alpha=0.5, label="Random") plt.hist(diffs, bins, alpha=0.3, label="Epoch {}".format(batch + 1)) plt.title("Histogram of errors in percentage growth") plt.legend(loc="upper right") plt.savefig(file_pre + ".histogram" + file_ext + ".b" + str(batch) + ".png") plt.close() # Plot measured vs. predicted values fig, ax = plt.subplots() plt.grid("on") ax.scatter(y_true, y_pred, color="red", s=10) ax.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], "k--", lw=4) ax.set_xlabel("Measured") ax.set_ylabel("Predicted") plt.savefig(file_pre + ".diff" + file_ext + ".b" + str(batch) + ".png") plt.close()
[docs]def plot_array(nparray, xlabel, ylabel, title, fname): plt.figure() plt.plot(nparray, lw=3.0) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(title) plt.savefig(fname, bbox_inches="tight") plt.close()
# UTILS for UQ / CALIBRATION VISUALIZATION from matplotlib.colors import LogNorm
[docs]def plot_density_observed_vs_predicted(Ytest, Ypred, pred_name=None, figprefix=None): """ Functionality to plot a 2D histogram of the distribution of observed (ground truth) values vs. predicted values. The plot generated is stored in a png file. :param numpy array Ytest: Array with (true) observed values :param numpy array Ypred: Array with predicted values. :param string pred_name: Name of data colum or quantity predicted (e.g. growth, AUC, etc.) :param string figprefix: String to prefix the filename to store the figure generated. A '_density_predictions.png' string will be appended to the figprefix given. """ xbins = 51 plt.figure(figsize=(24, 18)) # (30, 16) ax = plt.gca() plt.rc("xtick", labelsize=16) # fontsize of the tick labels ax.plot([Ytest.min(), Ytest.max()], [Ytest.min(), Ytest.max()], "r--", lw=4.0) plt.hist2d(Ytest, Ypred, bins=xbins, norm=LogNorm()) cb = plt.colorbar() ax.set_xlabel("Observed " + pred_name, fontsize=38, labelpad=15.0) ax.set_ylabel("Mean " + pred_name + " Predicted", fontsize=38, labelpad=15.0) ax.axis( [Ytest.min() * 0.98, Ytest.max() * 1.02, Ytest.min() * 0.98, Ytest.max() * 1.02] ) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) cb.ax.set_yticklabels(cb.ax.get_yticklabels(), fontsize=28) plt.grid(True) plt.savefig(figprefix + "_density_predictions.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + "_density_predictions.png")
[docs]def plot_2d_density_sigma_vs_error(sigma, yerror, method=None, figprefix=None): """ Functionality to plot a 2D histogram of the distribution of the standard deviations computed for the predictions vs. the computed errors (i.e. values of observed - predicted). The plot generated is stored in a png file. :param numpy array sigma: Array with standard deviations computed. :param numpy array yerror: Array with errors computed (observed - predicted). :param string method: Method used to comput the standard deviations (i.e. dropout, heteroscedastic, etc.). :param string figprefix: String to prefix the filename to store the figure generated. \ A '_density_sigma_error.png' string will be appended to the figprefix given. """ xbins = 51 ybins = 31 plt.figure(figsize=(24, 18)) # (30, 16) ax = plt.gca() plt.rc("xtick", labelsize=16) # fontsize of the tick labels plt.hist2d(sigma, yerror, bins=[xbins, ybins], norm=LogNorm()) cb = plt.colorbar() ax.set_xlabel("Standard Deviation (" + method + ")", fontsize=38, labelpad=15.0) ax.set_ylabel("Error: Observed - Mean Predicted", fontsize=38, labelpad=15.0) ax.axis([sigma.min() * 0.98, sigma.max() * 1.02, -yerror.max(), yerror.max()]) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) cb.ax.set_yticklabels(cb.ax.get_yticklabels(), fontsize=28) plt.grid(True) plt.savefig(figprefix + "_density_std_error.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + "_density_std_error.png")
[docs]def plot_histogram_error_per_sigma(sigma, yerror, method=None, figprefix=None): """ Functionality to plot a 1D histogram of the distribution of computed errors (i.e. values of observed - predicted) observed for specific values of standard deviations computed. The range of standard deviations computed is split in xbins values and the 1D histograms of error distributions for the smallest six standard deviations are plotted. The plot generated is stored in a png file. :param numpy array sigma: Array with standard deviations computed. :param numpy array yerror: Array with errors computed (observed - predicted). :param string method: Method used to comput the standard deviations (i.e. dropout, heteroscedastic, etc.). :param string figprefix: String to prefix the filename to store the figure generated.\ A '_histogram_error_per_sigma.png' string will be appended to the figprefix given. """ xbins = 21 ybins = 31 H, xedges, yedges, img = plt.hist2d( sigma, yerror, bins=[xbins, ybins] # normed=True, ) plt.figure(figsize=(18, 24)) legend = [] for ii in range(4): # (H.shape[0]): if ii != 1: plt.plot( yedges[0 : H.shape[1]], H[ii, :] / np.sum(H[ii, :]), marker="o", markersize=12, lw=6.0, ) legend.append(str((xedges[ii] + xedges[ii + 1]) / 2)) plt.legend(legend, fontsize=28) ax = plt.gca() plt.title("Error Dist. per Standard Deviation for " + method, fontsize=40) ax.set_xlabel("Error: Observed - Mean Predicted", fontsize=38, labelpad=15.0) ax.set_ylabel("Density", fontsize=38, labelpad=15.0) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.grid(True) plt.savefig(figprefix + "_histogram_error_per_std.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + "_histogram_error_per_std.png")
[docs]def plot_decile_predictions( Ypred, Ypred_Lp, Ypred_Hp, decile_list, pred_name=None, figprefix=None ): """ Functionality to plot the mean of the deciles predicted. The plot generated is stored in a png file. :param numpy array Ypred: Array with median predicted values. :param numpy array Ypred_Lp: Array with low decile predicted values. :param numpy array Ypred_Hp: Array with high decile predicted values. :param List decile_list: List of deciles predicted (e.g. '1st', '9th', etc.) :param string pred_name: Name of data colum or quantity predicted (e.g. growth, AUC, etc.) :param string figprefix: String to prefix the filename to store the figure generated. \ A '_decile_predictions.png' string will be appended to the figprefix given. """ index_ = np.argsort(Ypred) plt.figure(figsize=(24, 18)) plt.scatter(range(index_.shape[0]), Ypred[index_]) plt.scatter(range(index_.shape[0]), Ypred_Lp[index_]) plt.scatter(range(index_.shape[0]), Ypred_Hp[index_]) plt.legend(decile_list, fontsize=28) plt.xlabel("Index", fontsize=38.0) plt.ylabel(pred_name, fontsize=38.0) plt.title("Predicted " + pred_name + " Deciles", fontsize=40) plt.grid() ax = plt.gca() plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.savefig(figprefix + "_decile_predictions.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + "_decile_predictions.png")
[docs]def plot_calibration_interpolation( mean_sigma, error, splineobj1, splineobj2, method="", figprefix=None, steps=False ): """ Functionality to plot empirical calibration curves estimated by interpolation of the computed standard deviations and errors. Since the estimations are very noisy, two levels of smoothing are used. Both can be plotted independently, if requested. The plot(s) generated is(are) stored in png file(s). :param numpy array mean_sigma: Array with the mean standard deviations computed in inference. :param numpy array error: Array with the errors computed from the means predicted in inference. :param scipy.interpolate python object splineobj1: A python object from scipy.interpolate that computes a \ cubic Hermite spline (PchipInterpolator) to express \ the interpolation after the first smoothing. This \ spline is a partial result generated during the empirical \ calibration procedure. :param scipy.interpolate python object splineobj2: A python object from scipy.interpolate that computes a \ cubic Hermite spline (PchipInterpolator) to express \ the mapping from standard deviation to error. This \ spline is generated for interpolating the predictions \ after a process of smoothing-interpolation-smoothing \ computed during the empirical calibration procedure. :param string method: Method used to comput the standard deviations (i.e. dropout, heteroscedastic, etc.). :param string figprefix: String to prefix the filename to store the figure generated. \ A '_empirical_calibration_interpolation.png' string will be appended to the figprefix given. :param bool steps: Besides the complete empirical calibration (including the interpolating \ spline), also generates partial plots with only the spline of \ the interpolating spline after the first smoothing level (smooth1). """ xmax = np.max(mean_sigma) xmin = np.min(mean_sigma) xp23 = np.linspace(xmin, xmax, 200) yp23 = splineobj2(xp23) if steps: # Plot first smoothing yp23_1 = splineobj1(xp23) fig = plt.figure(figsize=(24, 18)) ax = plt.gca() ax.plot(mean_sigma, error, "kx") ax.plot(xp23, yp23_1, "gx", ms=20) plt.legend(["True", "Cubic Spline"], fontsize=28) plt.xlabel("Standard Deviation Predicted (" + method + ")", fontsize=38.0) plt.ylabel("Error: ABS Observed - Mean Predicted", fontsize=38.0) plt.title("Calibration (by Interpolation)", fontsize=40) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.grid() fig.tight_layout() plt.savefig( figprefix + "_empirical_calibration_interp_smooth1.png", bbox_inches="tight" ) plt.close() print( "Generated plot: ", figprefix + "_empirical_calibration_interp_smooth1.png" ) fig = plt.figure(figsize=(24, 18)) ax = plt.gca() ax.plot(mean_sigma, error, "kx") ax.plot(xp23, yp23, "rx", ms=20) plt.legend(["True", "Cubic Spline"], fontsize=28) plt.xlabel("Standard Deviation Predicted (" + method + ")", fontsize=38.0) plt.ylabel("Error: ABS Observed - Mean Predicted", fontsize=38.0) plt.title("Calibration (by Interpolation)", fontsize=40) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.grid() fig.tight_layout() plt.savefig( figprefix + "_empirical_calibration_interpolation.png", bbox_inches="tight" ) plt.close() print("Generated plot: ", figprefix + "_empirical_calibration_interpolation.png")
[docs]def plot_calibrated_std( y_test, y_pred, std_calibrated, thresC, pred_name=None, figprefix=None ): """ Functionality to plot values in testing set after calibration. An estimation of the lower-confidence samples is made. The plot generated is stored in a png file. :param numpy array y_test: Array with (true) observed values. :param numpy array y_pred: Array with predicted values. :param numpy array std_calibrated: Array with standard deviation values after calibration. :param float thresC: Threshold to label low confidence predictions (low \ confidence predictions are the ones with std > thresC). :param string pred_name: Name of data colum or quantity predicted (e.g. growth, AUC, etc.). :param string figprefix: String to prefix the filename to store the figure generated. \ A '_calibrated.png' string will be appended to the figprefix given. """ N = y_test.shape[0] index = np.argsort(y_pred) x = np.array(range(N)) indexC = std_calibrated > thresC alphafill = 0.5 if N > 2000: alphafill = 0.7 scale = 120 fig = plt.figure(figsize=(24, 18)) ax = plt.gca() ax.scatter(x, y_test[index], color="red", s=scale, alpha=0.5) plt.fill_between( x, y_pred[index] - 1.28 * std_calibrated[index], y_pred[index] + 1.28 * std_calibrated[index], color="gray", alpha=alphafill, ) plt.scatter(x, y_pred[index], color="orange", s=scale) plt.scatter(x[indexC], y_test[indexC], color="green", s=scale, alpha=0.5) plt.legend(["True", "1.28 Std", "Pred", "Low conf"], fontsize=28) plt.xlabel("Index", fontsize=38.0) plt.ylabel(pred_name + " Predicted", fontsize=38.0) plt.title("Calibrated Standard Deviation", fontsize=40) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.grid() fig.tight_layout() plt.savefig(figprefix + "_calibrated.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + "_calibrated.png")
[docs]def plot_contamination( y_true, y_pred, sigma, T=None, thresC=0.1, pred_name=None, figprefix=None ): """ Functionality to plot results for the contamination model. This includes the latent variables T if they are given (i.e. if the results provided correspond to training results). Global parameters for the normal distribution are used for shading 80% confidence interval. If results for training (i.e. T available), samples determined to be outliers (i.e. samples whose probability of membership to the heavy tailed distribution (Cauchy) is greater than the threshold given) are highlighted. The plot(s) generated is(are) stored in a png file. :param numpy array y_true: Array with observed values. :param numpy array y_pred: Array with predicted values. :param float sigma: Standard deviation of the normal distribution. :param numpy array T: Array with latent variables (i.e. membership to normal and heavy-tailed \ distributions). If in testing T is not available (i.e. None) :param float thresC: Threshold to label outliers (outliers are the ones \ with probability of membership to heavy-tailed distribution, i.e. T[:,1] > thresC). :param string pred_name: Name of data colum or quantity predicted (e.g. growth, AUC, etc.). :param string figprefix: String to prefix the filename to store the figures generated. \ A '_contamination.png' string will be appended to the figprefix given. """ N = y_true.shape[0] index = np.argsort(y_pred) x = np.array(range(N)) if T is not None: indexG = T[:, 0] > (1.0 - thresC) indexC = T[:, 1] > thresC ss = sigma * indexG prefig = "_outTrain" else: ss = sigma prefig = "_outTest" auxGh = y_pred + 1.28 * ss auxGl = y_pred - 1.28 * ss # Plotting Outliers scale = 120 fig = plt.figure(figsize=(24, 18)) ax = plt.gca() ax.scatter(x, y_true[index], color="red", s=scale) if T is not None: plt.scatter(x[indexC], y_true[indexC], color="green", s=scale) # , alpha=0.8) plt.scatter(x, y_pred[index], color="orange", s=scale) plt.fill_between(x, auxGl[index], auxGh[index], color="gray", alpha=0.5) if T is not None: plt.legend(["True", "Outlier", "Pred", "1.28 Std"], fontsize=28) else: plt.legend(["True", "Pred", "1.28 Std"], fontsize=28) plt.xlabel("Index", fontsize=38.0) plt.ylabel(pred_name + " Predicted", fontsize=38.0) plt.title("Contamination Results", fontsize=40) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.grid() fig.tight_layout() plt.savefig(figprefix + prefig + "_contamination.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + prefig + "_contamination.png") if T is not None: # Plotting Latent Variables vs error error = np.abs(y_true - y_pred) fig = plt.figure(figsize=(24, 18)) ax = plt.gca() ax.scatter(error, T[:, 0], color="blue", s=scale) ax.scatter(error, T[:, 1], color="orange", s=scale) plt.legend(["Normal", "Heavy-Tailed"], fontsize=28) plt.xlabel("ABS Error", fontsize=38.0) plt.ylabel("Membership Probability", fontsize=38.0) plt.title("Contamination: Latent Variables", fontsize=40) plt.setp(ax.get_xticklabels(), fontsize=32) plt.setp(ax.get_yticklabels(), fontsize=32) plt.grid() fig.tight_layout() plt.savefig(figprefix + "_T_contamination.png", bbox_inches="tight") plt.close() print("Generated plot: ", figprefix + "_T_contamination.png")
# plot training and validation metrics together and generate one chart per metrics
[docs]def plot_metrics(history, title=None, skip_ep=0, outdir=".", add_lr=False): """Plots keras training curves history. Args: skip_ep: number of epochs to skip when plotting metrics add_lr: add curve of learning rate progression over epochs """ def capitalize_metric(met): return " ".join(s.capitalize() for s in met.split("_")) all_metrics = list(history.history.keys()) pr_metrics = ["_".join(m.split("_")[1:]) for m in all_metrics if "val" in m] epochs = np.asarray(history.epoch) + 1 if len(epochs) <= skip_ep: skip_ep = 0 eps = epochs[skip_ep:] hh = history.history for p, m in enumerate(pr_metrics): metric_name = m metric_name_val = "val_" + m y_tr = hh[metric_name][skip_ep:] y_vl = hh[metric_name_val][skip_ep:] ymin = min(set(y_tr).union(y_vl)) ymax = max(set(y_tr).union(y_vl)) lim = (ymax - ymin) * 0.1 ymin, ymax = ymin - lim, ymax + lim # Start figure fig, ax1 = plt.subplots() # Plot metrics ax1.plot( eps, y_tr, color="b", marker=".", linestyle="-", linewidth=1, alpha=0.6, label=capitalize_metric(metric_name), ) ax1.plot( eps, y_vl, color="r", marker=".", linestyle="--", linewidth=1, alpha=0.6, label=capitalize_metric(metric_name_val), ) ax1.set_xlabel("Epoch") ax1.set_ylabel(capitalize_metric(metric_name)) ax1.set_xlim([min(eps) - 1, max(eps) + 1]) ax1.set_ylim([ymin, ymax]) ax1.tick_params("y", colors="k") # Add learning rate if (add_lr is True) and ("lr" in hh): ax2 = ax1.twinx() ax2.plot( eps, hh["lr"][skip_ep:], color="g", marker=".", linestyle=":", linewidth=1, alpha=0.6, markersize=5, label="LR", ) ax2.set_ylabel("Learning rate", color="g", fontsize=12) ax2.set_yscale("log") ax2.tick_params("y", colors="g") ax1.grid(True) legend = ax1.legend(loc="best", prop={"size": 10}) frame = legend.get_frame() frame.set_facecolor("0.95") if title is not None: plt.title(title) figpath = Path(outdir) / (metric_name + ".png") plt.savefig(figpath, bbox_inches="tight") plt.close()