Source code for metawards._inputfiles


from dataclasses import dataclass as _dataclass
from typing import Dict as _Dict
from typing import List as _List

__all__ = ["InputFiles"]

_inputfiles = {}

_default_folder_name = "model_data"


def _expand_path(json_file):
    from pathlib import Path
    return str(Path(json_file).expanduser().absolute())


def _is_description_json(json_file):
    """Return whether or not this json file contains description data"""
    import os
    if not (os.path.exists(json_file) and os.path.isfile(json_file)):
        return False

    try:
        import bz2
        with bz2.open(json_file, "rt") as FILE:
            while True:
                line = FILE.readline().strip()
                if len(line) > 0:
                    if line.startswith("["):
                        return False
                    elif line.startswith("{"):
                        return True

        return False
    except Exception:
        pass

    with open(json_file, "r") as FILE:
        while True:
            line = FILE.readline().strip()
            if len(line) > 0:
                if line.startswith("["):
                    return False
                elif line.startswith("{"):
                    return True

    return False


[docs]@_dataclass
class InputFiles:
    """This class holds all of the input files that must be loaded
       from METAWARDSDATA to construct the network of wards
       and links between them

       Load using the InputFiles.load function e.g.

       Examples
       --------
       >>> infiles = InputFiles.load("2011Data")
       >>> print(infiles)
       Model 2011Data version March 29 2020
       repository: https://github.com/metawards/MetaWardsData
       repository_branch: main
       repository_version: 0.2.0
       etc.
    """
    #: File from which to read the work matrix of links
    work: str = None
    #: File from which to read the play matrix of links
    play: str = None
    #: File from which to read all of the ward names
    identifier: str = None
    #: File from which to read all of the secondary
    #: ward IDs, (Communities, Counties, Districts, UA's etc)
    identifier2: str = None
    #: File from which to read the weekend matrix of links
    weekend: str = None
    #: File from which to read the size of the population in the work file
    work_size: str = None
    #: File from which to read the size of the population in the play file
    play_size: str = None
    #: File from which to read the positions (locations) of the wards
    #: (the centre of the bounding boxes)
    position: str = None
    #: Coordinates-system used for the positions. Should be 'x/y' or 'lat/long'
    coordinates: str = None
    #: File to look up metadata about the wards (e.g. their names)
    lookup: str = None
    #: Which columns in this file have the data
    lookup_columns: _Dict[str, int] = None
    #: File from which to read the values to seed the wards
    seed: str = None
    #: File from which to read the list of nodes to track
    nodes_to_track: str = None
    #: UV file
    uv: str = None
    #: Whether or not this is the special "single" ward model
    is_single: bool = False
    #: The json file containing the wards data if this is a ward_json
    wards_data: str = None

    _filename: str = None            # Full path of the description.json file
    _model_name: str = None          # Name of the model
    _model_path: str = None          # Directory containing the input files
    _model_version: str = None       # Version loaded from the data
    _authors: str = None             # Author(s) of this data
    _contacts: str = None            # Contact(s) for this data
    _references: str = None          # References for this data
    _repository: str = None          # GitHub repository for this data
    _repository_version: str = None  # Git version of the data
    _repository_branch: str = None   # Git branch for the data

    @property
    def is_model_dir(self) -> bool:
        return self._model_name is not None

    @property
    def is_wards_data(self) -> bool:
        return self.wards_data is not None

[docs]    def model_name(self):
        """Return the name of this model"""
        if self.is_wards_data:
            return "custom"
        elif self.is_single:
            return "single"
        else:
            return self._model_name

[docs]    def model_path(self):
        """Return the path to the directory containing this model"""
        if self.is_wards_data or self.is_single:
            return None
        else:
            return self._model_path

[docs]    def model_version(self):
        """Return the version of the data in this model"""
        if self.is_wards_data or self.is_single:
            return None
        else:
            return self._model_version

[docs]    def __str__(self):
        if self.is_single:
            return "Model: single ward"
        elif self.is_wards_data:
            import os
            filename = os.path.basename(self.wards_data)
            return f"Model: {filename}"
        else:
            return f"""* Model: {self._model_name}
* loaded from: {self._filename}
* repository: {self._repository}
* repository_branch: {self._repository_branch}
* repository_version: {self._repository_version}
* work: {self.work}
* play: {self.play}
* identifier: {self.identifier}
* identifier2: {self.identifier2}
* weekend: {self.weekend}
* work_size: {self.work_size}
* play_size: {self.play_size}
* position: {self.position}
* coordinates: {self.coordinates}
* lookup: {self.lookup}
* lookup_columns: {self.lookup_columns}
* seed: {self.seed}
* nodes_to_track: {self.nodes_to_track}"""

[docs]    def __repr__(self):
        if self.is_single:
            return f"InputFiles::single"
        elif self.is_wards_data:
            import os
            filename = os.path.basename(self.wards_data)
            return f"InputFiles(wards_data='{filename}')"
        else:
            return f"InputFiles(model='{self._model_name}')"

[docs]    def __hash__(self):
        return self._filename.__hash__()

    def _localise(self):
        """Localise the filenames in this input files set. This will
           prepend model_path/model to every filename and will also
           double-check that all files exist and are readable
        """
        if self.is_single or self.is_wards_data:
            return

        members = [attr for attr in dir(self)
                   if not callable(getattr(self, attr))
                   and not attr.startswith("_")]

        for member in members:
            if member in ["coordinates", "lookup_columns", "is_model_dir",
                          "is_wards_data", "wards_data", "is_single"]:
                continue

            filename = getattr(self, member)
            if filename:
                import os
                filename = os.path.join(self._model_path, filename)

                if not (os.path.exists(filename) and
                        os.path.isfile(filename)):
                    raise FileNotFoundError(
                        f"Cannot find input file {member} = {filename}")

                setattr(self, member, filename)

[docs]    @staticmethod
    def load(model: str = "2011Data",
             repository: str = None,
             folder: str = None,
             description: str = "description.json",
             filename: str = None):
        """Load the parameters associated with the passed model.
           This will look for the parameters specified in
           the json file called f"{repository}/{folder}/{model}/{description}"

           By default this will load the 2011Data parameters
           from $HOME/GitHub/model_data/2011Data/description.json

           Alternatively you can provide the full path to the
           description json file usng the 'filename' argument.
           All files within this description will be searched
           for using the directory that contains that file
           as a base

           Parameters
           ----------
           model: str
             The name of the model data to load. This is the name that
             will be searched for in the METAWARDSDATA model_data directory
           repository: str
             The location of the cloned METAWARDSDATA repository
           folder: str
             The name of the folder within the METAWARDSDATA repository
             that contains the model data
           filename: str
             The name of the file to load the model data from - this directly
             loads this file without searching through the METAWARDSDATA
             repository

           Returns
           -------
           input_files: InputFiles
             The constructed and validated set of input files
        """
        repository_version = None
        repository_branch = None

        if model == "single":
            # This is the special 'single-ward' model - just return
            # a basic InputFiles
            return InputFiles(is_single=True)

        if filename is None:
            from ._parameters import get_repository
            import os

            if folder is None or os.path.isabs(model):
                filename = model
            else:
                filename = os.path.join(folder, model)

            if os.path.exists(filename) and os.path.isfile(filename):
                filename = _expand_path(filename)
            else:
                repository, v = get_repository(repository)

                if folder is None:
                    folder = _default_folder_name

                filename = os.path.join(repository, folder,
                                        model, description)

                filename = _expand_path(filename)

                repository = v["repository"]
                repository_version = v["version"]
                repository_branch = v["branch"]

        json_file = filename
        import os
        model_path = os.path.dirname(filename)

        if not (os.path.exists(json_file) and os.path.isfile(json_file)):
            from .utils._console import Console
            Console.error(f"Cannot read file {json_file} as it doesn't exist")
            raise IOError(
                f"Cannot load inputfiles as {json_file} doesn't exist")

        if not _is_description_json(json_file):
            # this must be wards data...
            json_file = _expand_path(json_file)
            return InputFiles(wards_data=json_file,
                              _filename=json_file)

        try:
            import json
            try:
                import bz2
                with bz2.open(json_file, "rt") as FILE:
                    files = json.load(FILE)
            except Exception:
                files = None

            if files is None:
                with open(json_file, "r") as FILE:
                    files = json.load(FILE)

        except Exception as e:
            from .utils._console import Console
            Console.error(f"""Could not find the model file {json_file}.
Either it does not exist or was corrupted.
Error was {e.__class__} {e}.
Please see https://metawards.org/model_data for instructions on how
to download and set the model data.""")
            raise FileNotFoundError(f"Could not find or read {json_file}: "
                                    f"{e.__class__} {e}")

        model = InputFiles(work=files.get("work", None),
                           play=files.get("play", None),
                           identifier=files.get("identifier", None),
                           identifier2=files.get("identifier2", None),
                           weekend=files.get("weekend", None),
                           work_size=files.get("work_size", None),
                           play_size=files.get("play_size", None),
                           position=files.get("position", None),
                           coordinates=files.get("coordinates", "x/y"),
                           lookup=files.get("lookup", None),
                           lookup_columns=files.get("lookup_columns", None),
                           seed=files.get("seed", None),
                           nodes_to_track=files.get("nodes_to_track", None),
                           uv=files.get("uv", None),
                           _filename=json_file,
                           _model_path=model_path,
                           _model_name=files.get("name", model),
                           _model_version=files.get("version", "unknown"),
                           _references=files.get("reference(s)", "none"),
                           _authors=files.get("author(s)", "unknown"),
                           _contacts=files.get("contact(s)", "unknown"),
                           _repository=repository,
                           _repository_version=repository_version,
                           _repository_branch=repository_branch)

        model._localise()

        return model