Source code for metawards.analysis._summary_plot


__all__ = ["import_graphics_modules",
           "save_summary_plots",
           "create_overview_plot",
           "create_average_plot",
           "create_demographics_plot"]


[docs]def import_graphics_modules(verbose=False): """Imports pandas and matplotlib in a safe way, giving a good error message if something goes wrong. Parameters ---------- verbose: bool Whether or not to print to the screen to signal progress... Returns ------- (pd, plt) The pandas (pd) and matplotlib.pyplot (plt) modules """ try: if verbose: print("Importing graphics modules...") import pandas as pd import matplotlib.pyplot as plt except ImportError: print("You must have pandas and matplotlib installed to run " "metawards-plot") print("Install using either `pip install pandas` if you are using") print("pip, or 'conda install pandas' if you are using conda, ") print("or by running 'metawards-install --optional") raise ImportError("Cannot produce the plot as pandas and matplotlib " "are not installed.") return (pd, plt)
[docs]def create_overview_plot(df, output_dir: str = None, format: str = "jpg", dpi: int = 150, align_axes: bool = True, verbose: bool = True): """Create a summary plot of the result.csv data held in the passed pandas dataframe. This returns the figure for you to save if desired (or just call ``plt.show()`` to show it in Jupyter) If the dataframe contains multiple fingerprints, then this will return a dictionary of figures, one for each fingerprint, indexed by fingerprint Parameters ---------- df : Pandas Dataframe The pandas dataframe containing the data from results.csv.bz2 output_dir: str The name of the directory in which to draw the graphs. If this is set then the graphs are written to files as they are generated and the filenames of the figures are returned. This is necessary when the number of graphs to draw is high and you don't want to waste too much memory format: str Format to save the figures in if output_dir is supplied dpi: int dpi (dots per inch) resolution to save the figures with if a bitmap format is used and output_dir is supplied align_axes: bool If true (default) then this will ensure that all of the plots for different fingerprints are put on the same axis scale verbose: bool Whether or not to print progress to the screen Returns ------- fig The matplotlib figure containing the summary plot, or a dictionary of figures if there are multiple fingerprints, or the filename if output_dir was supplied, or a dictionary of multiple filenames indexed by fingerprint """ _, plt = import_graphics_modules() try: fingerprints = df["fingerprint"].unique() repeat = "repeat" except Exception: # no fingerprints fingerprints = [None] repeat = "demographic" try: import PIL # noqa - disable unused warning except ImportError: if format == "jpg": print( "WARNING: Missing 'pillow' package, defaulting to PNG format.") format = "png" figs = {} min_date = None max_date = None max_y = {} min_y = {} columns = ["E", "I", "IW", "R"] nfigs = len(fingerprints) if len(fingerprints) > 1 and align_axes: for fingerprint in fingerprints: df2 = df[df["fingerprint"] == fingerprint] for column in columns: min_d = df2["day"].min() max_d = df2["day"].max() min_val = df2[column].min() max_val = df2[column].max() if min_date is None: min_date = min_d max_date = max_d else: if min_d < min_date: min_date = min_d if max_d > max_date: max_date = max_d if column not in min_y: min_y[column] = min_val max_y[column] = max_val else: if min_val < min_y[column]: min_y[column] = min_val if max_val > max_y[column]: max_y[column] = max_val for fingerprint in fingerprints: if fingerprint is None: df2 = df else: df2 = df[df["fingerprint"] == fingerprint] fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10)) i = 0 j = 0 for column in columns: ax = df2.pivot(index="date", columns=repeat, values=column).plot.line(ax=axes[i][j]) ax.tick_params('x', labelrotation=90) ax.get_legend().remove() ax.set_ylabel("Population") if len(fingerprints) > 1 and align_axes: ax.set_xlim(min_date, max_date) ax.set_ylim(min_y[column], 1.1*max_y[column]) if len(fingerprints) > 1: from metawards import VariableSet fvals, _rpt = VariableSet.extract_values(fingerprint) ax.set_title(f"{fvals} : {column}") else: ax.set_title(column) j += 1 if j == 2: j = 0 i += 1 fig.tight_layout(pad=1) if output_dir: import os if nfigs == 1: filename = os.path.join(output_dir, f"overview.{format}") else: filename = os.path.join(output_dir, f"overview_{fingerprint}.{format}") if verbose: print(f"Saving figure {filename}") fig.savefig(filename, dpi=dpi) plt.close() fig = None figs[fingerprint] = filename else: if verbose: print(f"Created the figure for {fingerprint}") figs[fingerprint] = fig if len(figs) == 0: return None elif len(figs) == 1: return figs[list(figs.keys())[0]] else: return figs
[docs]def create_average_plot(df, output_dir: str = None, format: str = "jpg", dpi: int = 150, align_axes: bool = True, verbose: bool = True): """Create an average plot of the result.csv data held in the passed pandas dataframe. This returns the figure for you to save if desired (or just call ``plt.show()`` to show it in Jupyter) Note that this won't do anything unless there are multiple repeats of the model run in the output. In that case, it will return None If the dataframe contains multiple fingerprints, then this will return a dictionary of figures, one for each fingerprint, indexed by fingerprint Parameters ---------- df : Pandas Dataframe The pandas dataframe containing the data from results.csv.bz2 output_dir: str The name of the directory in which to draw the graphs. If this is set then the graphs are written to files as they are generated and the filenames of the figures are returned. This is necessary when the number of graphs to draw is high and you don't want to waste too much memory format: str Format to save the figures in if output_dir is supplied dpi: int dpi (dots per inch) resolution to save the figures with if a bitmap format is used and output_dir is supplied align_axes: bool If true (default) then this will ensure that all of the plots for different fingerprints are put on the same axis scale verbose: bool Whether or not to print progress to the screen Returns ------- fig The matplotlib figure containing the average plot, or a dictionary of figures if there are multiple fingerprints, or the filename if output_dir was supplied, or a dictionary of multiple filenames indexed by fingerprint """ try: import PIL # noqa - disable unused warning except ImportError: if format == "jpg": print( "WARNING: Missing 'pillow' package, defaulting to PNG format.") format = "png" fingerprints = df["fingerprint"].unique() figs = {} nfigs = len(fingerprints) for fingerprint in fingerprints: df2 = df[df["fingerprint"] == fingerprint] nrepeats = len(df2["repeat"].unique()) if nrepeats > 1: _, plt = import_graphics_modules() fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10)) mean_average = df2.groupby("date").mean() stddev = df2.groupby("date").std() i = 0 j = 0 for column in ["E", "I", "IW", "R"]: ax = mean_average.plot.line(y=column, yerr=stddev[column], ax=axes[i][j]) ax.tick_params('x', labelrotation=90) ax.get_legend().remove() ax.set_title(column) ax.set_ylabel("Population") j += 1 if j == 2: j = 0 i += 1 fig.tight_layout(pad=1) if output_dir: import os if nfigs == 1: filename = os.path.join(output_dir, f"average.{format}") else: filename = os.path.join(output_dir, f"average_{fingerprint}.{format}") if verbose: print(f"Saving figure {filename}") fig.savefig(filename, dpi=dpi) fig = None plt.close() figs[fingerprint] = filename else: if verbose: print(f"Created the figure for {fingerprint}") figs[fingerprint] = fig if len(figs) == 0: return None elif len(figs) == 1: return figs[list(figs.keys())[0]] else: return figs
def get_color(name=None, idx=None): """Return a good color for the passed name or passed index (idx)""" name = str(name).strip().lower() if name == "overall": return "black" elif name in ["red", "blue", "green", "orange", "yellow", "black", "white", "gray", "pink"]: return name else: names = ["red", "blue", "green", "orange", "pink", "gray"] if idx is None or idx < 0 or idx >= len(names): # return a random colour import random rgb = (random.random(), random.random(), random.random()) return rgb else: return names[int(idx)] def create_demographics_plot(df, output_dir: str = None, format: str = "jpg", dpi: int = 150, verbose: bool = True): """Create a demographics plot of the trajectory.csv data held in the passed pandas dataframe. This returns the figure for you to save if desired (or just call ``plt.show()`` to show it in Jupyter) Parameters ---------- df : Pandas Dataframe The pandas dataframe containing the data from trajectory.csv.bz2 output_dir: str The name of the directory in which to draw the graphs. If this is set then the graphs are written to files as they are generated and the filenames of the figures are returned. This is necessary when the number of graphs to draw is high and you don't want to waste too much memory format: str Format to save the figures in if output_dir is supplied dpi: int dpi (dots per inch) resolution to save the figures with if a bitmap format is used and output_dir is supplied align_axes: bool If true (default) then this will ensure that all of the plots for different fingerprints are put on the same axis scale verbose: bool Whether or not to print progress to the screen Returns ------- fig The matplotlib figure containing the demographics plot, or the filename if output_dir was supplied """ try: import PIL # noqa - disable unused warning except ImportError: if format == "jpg": print( "WARNING: Missing 'pillow' package, defaulting to PNG format.") format = "png" _, plt = import_graphics_modules() columns = ["E", "I", "IW", "R"] fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10)) # see if any of these give colour names colors = [] demographics = df.pivot(index="date", columns="demographic", values="day").columns for i, demographic in enumerate(demographics): colors.append(get_color(demographic, i)) i = 0 j = 0 for column in columns: ax = df.pivot(index="date", columns="demographic", values=column).plot.line(ax=axes[i][j], color=colors) ax.tick_params('x', labelrotation=90) ax.set_ylabel("Population") ax.set_title(column) j += 1 if j == 2: j = 0 i += 1 fig.tight_layout(pad=1) if output_dir: import os filename = os.path.join(output_dir, f"demographics.{format}") if verbose: print(f"Saving figure {filename}") fig.savefig(filename, dpi=dpi) plt.close() fig = filename return fig
[docs]def save_summary_plots(results: str, output_dir: str = None, format: str = "jpg", dpi: int = 150, align_axes: bool = True, verbose=False): """Create summary plots of the data contained in the passed 'results.csv.bz2' file that was produced by metawards and save them to disk. Parameters ---------- results: str The full path to the file containing the results. This **must** have been created by ``metawards`` output_dir: str Path to the directory in which you want to place the graphs. This defaults to the same directory that contains 'results' format: str The format to use to save the graphs. This defaults to 'pdf' dpi: int The dots-per-inch to use when saving bitmap graphics (e.g. png, jpg etc) align_axes: bool Whether or not to plot all graphs in a set on the same axes verbose: bool Whether or not to print progress to the screen Returns ------- filenames: List(str) Full file paths of all of the files written by this function """ try: import PIL # noqa - disable unused warning except ImportError: if format == "jpg": print( "WARNING: Missing 'pillow' package, defaulting to PNG format.") format = "png" pd, _ = import_graphics_modules(verbose=verbose) import os if verbose: print(f"Reading data from {results}...") df = pd.read_csv(results) if output_dir is None: output_dir = os.path.dirname(results) if output_dir is None or len(output_dir) == 0: output_dir = "." if format is None: format = "pdf" filenames = [] # is this an output from multiple runs? try: df["fingerprint"] has_fingerprint = True except Exception: has_fingerprint = False pass # does this have demographic data? try: df["demographic"] has_demographics = True except Exception: has_demographics = False if has_fingerprint: if verbose: print(f"Creating overview plot(s)...") figs = create_overview_plot(df, output_dir=output_dir, format=format, dpi=dpi, align_axes=align_axes) if isinstance(figs, dict): filenames += list(figs.values()) elif figs is not None: filenames.append(figs) if verbose: print(f"Creating average plot(s)...") figs = create_average_plot(df, output_dir=output_dir, format=format, dpi=dpi, align_axes=align_axes) if isinstance(figs, dict): filenames += list(figs.values()) elif figs is not None: filenames.append(figs) if has_demographics: fig = create_demographics_plot(df, output_dir=output_dir, format=format, dpi=dpi) if fig is not None: filenames.append(fig) return filenames