Source code for metawards.analysis._summary_plot


__all__ = ["import_graphics_modules",
           "save_summary_plots",
           "create_overview_plot",
           "create_average_plot",
           "create_demographics_plot"]


[docs]def import_graphics_modules(verbose=False):
    """Imports pandas and matplotlib in a safe way, giving a good
       error message if something goes wrong.

       Parameters
       ----------
       verbose: bool
         Whether or not to print to the screen to signal progress...

       Returns
       -------
       (pd, plt)
          The pandas (pd) and matplotlib.pyplot (plt) modules
    """
    try:
        if verbose:
            print("Importing graphics modules...")
        import pandas as pd
        import matplotlib.pyplot as plt
    except ImportError:
        print("You must have pandas and matplotlib installed to run "
              "metawards-plot")
        print("Install using either `pip install pandas` if you are using")
        print("pip, or 'conda install pandas' if you are using conda, ")
        print("or by running 'metawards-install --optional")
        raise ImportError("Cannot produce the plot as pandas and matplotlib "
                          "are not installed.")

    return (pd, plt)


[docs]def create_overview_plot(df, output_dir: str = None,
                         format: str = "jpg", dpi: int = 150,
                         align_axes: bool = True, verbose: bool = True):
    """Create a summary plot of the result.csv data held in the
       passed pandas dataframe. This returns the figure for you
       to save if desired (or just call ``plt.show()`` to show
       it in Jupyter)

       If the dataframe contains multiple fingerprints, then this
       will return a dictionary of figures, one for each fingerprint,
       indexed by fingerprint

       Parameters
       ----------
       df : Pandas Dataframe
         The pandas dataframe containing the data from results.csv.bz2
       output_dir: str
         The name of the directory in which to draw the graphs. If this
         is set then the graphs are written to files as they are generated
         and the filenames of the figures are returned. This is necessary
         when the number of graphs to draw is high and you don't want
         to waste too much memory
       format: str
         Format to save the figures in if output_dir is supplied
       dpi: int
         dpi (dots per inch) resolution to save the figures with if
         a bitmap format is used and output_dir is supplied
       align_axes: bool
         If true (default) then this will ensure that all of the plots
         for different fingerprints are put on the same axis scale
       verbose: bool
         Whether or not to print progress to the screen

       Returns
       -------
       fig
         The matplotlib figure containing the summary plot, or a
         dictionary of figures if there are multiple fingerprints,
         or the filename if output_dir was supplied, or a dictionary
         of multiple filenames indexed by fingerprint
    """
    _, plt = import_graphics_modules()

    try:
        fingerprints = df["fingerprint"].unique()
        repeat = "repeat"
    except Exception:
        # no fingerprints
        fingerprints = [None]
        repeat = "demographic"

    try:
        import PIL        # noqa - disable unused warning
    except ImportError:
        if format == "jpg":
            print(
                "WARNING: Missing 'pillow' package, defaulting to PNG format.")
            format = "png"

    figs = {}

    min_date = None
    max_date = None
    max_y = {}
    min_y = {}

    columns = ["E", "I", "IW", "R"]

    nfigs = len(fingerprints)

    if len(fingerprints) > 1 and align_axes:
        for fingerprint in fingerprints:
            df2 = df[df["fingerprint"] == fingerprint]

            for column in columns:
                min_d = df2["day"].min()
                max_d = df2["day"].max()
                min_val = df2[column].min()
                max_val = df2[column].max()

                if min_date is None:
                    min_date = min_d
                    max_date = max_d
                else:
                    if min_d < min_date:
                        min_date = min_d
                    if max_d > max_date:
                        max_date = max_d

                if column not in min_y:
                    min_y[column] = min_val
                    max_y[column] = max_val
                else:
                    if min_val < min_y[column]:
                        min_y[column] = min_val
                    if max_val > max_y[column]:
                        max_y[column] = max_val

    for fingerprint in fingerprints:
        if fingerprint is None:
            df2 = df
        else:
            df2 = df[df["fingerprint"] == fingerprint]

        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))

        i = 0
        j = 0

        for column in columns:
            ax = df2.pivot(index="date", columns=repeat,
                           values=column).plot.line(ax=axes[i][j])
            ax.tick_params('x', labelrotation=90)
            ax.get_legend().remove()
            ax.set_ylabel("Population")

            if len(fingerprints) > 1 and align_axes:
                ax.set_xlim(min_date, max_date)
                ax.set_ylim(min_y[column], 1.1*max_y[column])

            if len(fingerprints) > 1:
                from metawards import VariableSet
                fvals, _rpt = VariableSet.extract_values(fingerprint)
                ax.set_title(f"{fvals} : {column}")
            else:
                ax.set_title(column)

            j += 1
            if j == 2:
                j = 0
                i += 1

        fig.tight_layout(pad=1)

        if output_dir:
            import os

            if nfigs == 1:
                filename = os.path.join(output_dir, f"overview.{format}")
            else:
                filename = os.path.join(output_dir,
                                        f"overview_{fingerprint}.{format}")

            if verbose:
                print(f"Saving figure {filename}")

            fig.savefig(filename, dpi=dpi)
            plt.close()
            fig = None
            figs[fingerprint] = filename
        else:
            if verbose:
                print(f"Created the figure for {fingerprint}")

            figs[fingerprint] = fig

    if len(figs) == 0:
        return None
    elif len(figs) == 1:
        return figs[list(figs.keys())[0]]
    else:
        return figs


[docs]def create_average_plot(df, output_dir: str = None, format: str = "jpg",
                        dpi: int = 150, align_axes: bool = True,
                        verbose: bool = True):
    """Create an average plot of the result.csv data held in the
       passed pandas dataframe. This returns the figure for you
       to save if desired (or just call ``plt.show()`` to show
       it in Jupyter)

       Note that this won't do anything unless there are multiple
       repeats of the model run in the output. In that case, it
       will return None

       If the dataframe contains multiple fingerprints, then this
       will return a dictionary of figures, one for each fingerprint,
       indexed by fingerprint

       Parameters
       ----------
       df : Pandas Dataframe
         The pandas dataframe containing the data from results.csv.bz2
       output_dir: str
         The name of the directory in which to draw the graphs. If this
         is set then the graphs are written to files as they are generated
         and the filenames of the figures are returned. This is necessary
         when the number of graphs to draw is high and you don't want
         to waste too much memory
       format: str
         Format to save the figures in if output_dir is supplied
       dpi: int
         dpi (dots per inch) resolution to save the figures with if
         a bitmap format is used and output_dir is supplied
       align_axes: bool
         If true (default) then this will ensure that all of the plots
         for different fingerprints are put on the same axis scale
       verbose: bool
         Whether or not to print progress to the screen

       Returns
       -------
       fig
         The matplotlib figure containing the average plot, or a
         dictionary of figures if there are multiple fingerprints,
         or the filename if output_dir was supplied, or a dictionary
         of multiple filenames indexed by fingerprint
    """
    try:
        import PIL        # noqa - disable unused warning
    except ImportError:
        if format == "jpg":
            print(
                "WARNING: Missing 'pillow' package, defaulting to PNG format.")
            format = "png"

    fingerprints = df["fingerprint"].unique()

    figs = {}

    nfigs = len(fingerprints)

    for fingerprint in fingerprints:
        df2 = df[df["fingerprint"] == fingerprint]

        nrepeats = len(df2["repeat"].unique())

        if nrepeats > 1:
            _, plt = import_graphics_modules()

            fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))

            mean_average = df2.groupby("date").mean()
            stddev = df2.groupby("date").std()

            i = 0
            j = 0

            for column in ["E", "I", "IW", "R"]:
                ax = mean_average.plot.line(y=column, yerr=stddev[column],
                                            ax=axes[i][j])
                ax.tick_params('x', labelrotation=90)
                ax.get_legend().remove()
                ax.set_title(column)
                ax.set_ylabel("Population")

                j += 1
                if j == 2:
                    j = 0
                    i += 1

            fig.tight_layout(pad=1)

            if output_dir:
                import os

                if nfigs == 1:
                    filename = os.path.join(output_dir, f"average.{format}")
                else:
                    filename = os.path.join(output_dir,
                                            f"average_{fingerprint}.{format}")

                if verbose:
                    print(f"Saving figure {filename}")

                fig.savefig(filename, dpi=dpi)
                fig = None
                plt.close()
                figs[fingerprint] = filename
            else:
                if verbose:
                    print(f"Created the figure for {fingerprint}")

                figs[fingerprint] = fig

    if len(figs) == 0:
        return None
    elif len(figs) == 1:
        return figs[list(figs.keys())[0]]
    else:
        return figs


def get_color(name=None, idx=None):
    """Return a good color for the passed name or passed index (idx)"""
    name = str(name).strip().lower()

    if name == "overall":
        return "black"

    elif name in ["red", "blue", "green", "orange", "yellow", "black",
                  "white", "gray", "pink"]:
        return name

    else:
        names = ["red", "blue", "green", "orange", "pink", "gray"]

        if idx is None or idx < 0 or idx >= len(names):
            # return a random colour
            import random
            rgb = (random.random(), random.random(), random.random())
            return rgb
        else:
            return names[int(idx)]


def create_demographics_plot(df, output_dir: str = None,
                             format: str = "jpg", dpi: int = 150,
                             verbose: bool = True):
    """Create a demographics plot of the trajectory.csv data held in the
       passed pandas dataframe. This returns the figure for you
       to save if desired (or just call ``plt.show()`` to show
       it in Jupyter)

       Parameters
       ----------
       df : Pandas Dataframe
         The pandas dataframe containing the data from trajectory.csv.bz2
       output_dir: str
         The name of the directory in which to draw the graphs. If this
         is set then the graphs are written to files as they are generated
         and the filenames of the figures are returned. This is necessary
         when the number of graphs to draw is high and you don't want
         to waste too much memory
       format: str
         Format to save the figures in if output_dir is supplied
       dpi: int
         dpi (dots per inch) resolution to save the figures with if
         a bitmap format is used and output_dir is supplied
       align_axes: bool
         If true (default) then this will ensure that all of the plots
         for different fingerprints are put on the same axis scale
       verbose: bool
         Whether or not to print progress to the screen

       Returns
       -------
       fig
         The matplotlib figure containing the demographics plot, or
         the filename if output_dir was supplied
    """
    try:
        import PIL        # noqa - disable unused warning
    except ImportError:
        if format == "jpg":
            print(
                "WARNING: Missing 'pillow' package, defaulting to PNG format.")
            format = "png"

    _, plt = import_graphics_modules()

    columns = ["E", "I", "IW", "R"]

    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))

    # see if any of these give colour names
    colors = []

    demographics = df.pivot(index="date", columns="demographic",
                            values="day").columns

    for i, demographic in enumerate(demographics):
        colors.append(get_color(demographic, i))

    i = 0
    j = 0

    for column in columns:
        ax = df.pivot(index="date", columns="demographic",
                      values=column).plot.line(ax=axes[i][j],
                                               color=colors)
        ax.tick_params('x', labelrotation=90)
        ax.set_ylabel("Population")
        ax.set_title(column)

        j += 1
        if j == 2:
            j = 0
            i += 1

    fig.tight_layout(pad=1)

    if output_dir:
        import os

        filename = os.path.join(output_dir, f"demographics.{format}")

        if verbose:
            print(f"Saving figure {filename}")

        fig.savefig(filename, dpi=dpi)
        plt.close()
        fig = filename

    return fig


[docs]def save_summary_plots(results: str, output_dir: str = None,
                       format: str = "jpg", dpi: int = 150,
                       align_axes: bool = True,
                       verbose=False):
    """Create summary plots of the data contained in the passed
       'results.csv.bz2' file that was produced by metawards
       and save them to disk.

       Parameters
       ----------
       results: str
         The full path to the file containing the results. This
         **must** have been created by ``metawards``
       output_dir: str
         Path to the directory in which you want to place the graphs.
         This defaults to the same directory that contains 'results'
       format: str
         The format to use to save the graphs. This defaults to 'pdf'
       dpi: int
         The dots-per-inch to use when saving bitmap graphics (e.g.
         png, jpg etc)
       align_axes: bool
         Whether or not to plot all graphs in a set on the same axes
       verbose: bool
         Whether or not to print progress to the screen

       Returns
       -------
       filenames: List(str)
         Full file paths of all of the files written by this function
    """
    try:
        import PIL        # noqa - disable unused warning
    except ImportError:
        if format == "jpg":
            print(
                "WARNING: Missing 'pillow' package, defaulting to PNG format.")
            format = "png"

    pd, _ = import_graphics_modules(verbose=verbose)
    import os

    if verbose:
        print(f"Reading data from {results}...")

    df = pd.read_csv(results)

    if output_dir is None:
        output_dir = os.path.dirname(results)

        if output_dir is None or len(output_dir) == 0:
            output_dir = "."

    if format is None:
        format = "pdf"

    filenames = []

    # is this an output from multiple runs?
    try:
        df["fingerprint"]
        has_fingerprint = True
    except Exception:
        has_fingerprint = False
        pass

    # does this have demographic data?
    try:
        df["demographic"]
        has_demographics = True
    except Exception:
        has_demographics = False

    if has_fingerprint:
        if verbose:
            print(f"Creating overview plot(s)...")

        figs = create_overview_plot(df, output_dir=output_dir,
                                    format=format, dpi=dpi,
                                    align_axes=align_axes)

        if isinstance(figs, dict):
            filenames += list(figs.values())
        elif figs is not None:
            filenames.append(figs)

        if verbose:
            print(f"Creating average plot(s)...")

        figs = create_average_plot(df, output_dir=output_dir,
                                   format=format, dpi=dpi,
                                   align_axes=align_axes)

        if isinstance(figs, dict):
            filenames += list(figs.values())
        elif figs is not None:
            filenames.append(figs)

    if has_demographics:
        fig = create_demographics_plot(df, output_dir=output_dir,
                                       format=format, dpi=dpi)

        if fig is not None:
            filenames.append(fig)

    return filenames