Extracting by location
Up to this point, we have only looked at the summary data from each
model run. There is a lot more that can be explored, as metawards
models the outbreak in every ward in the model (e.g. every electoral
ward in the UK).
Metadata about those wards is loaded into
info()
object, which is of class
WardInfos
. This class can be queried to get
the index of wards according to their name, their official ID code,
or the local authority or region in which it belongs.
Searching using WardInfo
For example, we can find all of the wards that match the name
“Clifton” using the below code. Open up ipython
or a Jupyter
notebook and type;
>>> from metawards import Network, Parameters
>>> params = Parameters.load()
>>> params.set_input_files("2011Data")
>>> params.set_disease("lurgy")
>>> network = Network.build(params)
This has now built a network object that you can query, e.g.
>>> clifton = network.info.find("Clifton")
>>> print(clifton)
[154, 403, 829, 3612, 3662, 3670, 3703, 3766, 3974, 3975, 8134, 8327, 8328]
>>> for ward in clifton:
... print(network.info[ward])
WardInfo(name='Clifton-with-Maidenway', alternate_names=['Clifton-with-Maidenway'], code='E05002101', alternate_codes=['E36000654'], authority='Torbay', authority_code='E06000027', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05003119', alternate_codes=['E36001870'], authority='Allerdale', authority_code='E07000026', region='', region_code='')
WardInfo(name='Clifton and Bradley', alternate_names=['Clifton and Bradley'], code='E05003350', alternate_codes=['E36002100'], authority='Derbyshire Dales', authority_code='E07000035', region='', region_code='')
WardInfo(name='Skelton, Rawcliffe and Clifton Without', alternate_names=['Skelton, Rawcliffe and Clifton Without'], code='E05001763', alternate_codes=['E36000299'], authority='York', authority_code='E06000014', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05001980', alternate_codes=['E36000533'], authority='Bristol, City of', authority_code='E06000023', region='', region_code='')
WardInfo(name='Clifton East', alternate_names=['Clifton East'], code='E05001981', alternate_codes=['E36000534'], authority='Bristol, City of', authority_code='E06000023', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05001747', alternate_codes=['E36000283'], authority='York', authority_code='E06000014', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05001648', alternate_codes=['E36000184'], authority='Blackpool', authority_code='E06000009', region='', region_code='')
WardInfo(name='Clifton North', alternate_names=['Clifton North'], code='E05001831', alternate_codes=['E36000367'], authority='Nottingham', authority_code='E06000018', region='', region_code='')
WardInfo(name='Clifton South', alternate_names=['Clifton South'], code='E05001832', alternate_codes=['E36000368'], authority='Nottingham', authority_code='E06000018', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05005188', alternate_codes=['E36003801'], authority='Fylde', authority_code='E07000119', region='', region_code='')
WardInfo(name='Cliftonville East', alternate_names=['Cliftonville East'], code='E05005087', alternate_codes=['E36003700'], authority='Thanet', authority_code='E07000114', region='', region_code='')
WardInfo(name='Cliftonville West', alternate_names=['Cliftonville West'], code='E05005088', alternate_codes=['E36003701'], authority='Thanet', authority_code='E07000114', region='', region_code='')
This has returned all wards that have “Clifton” in the name. The search is
actually performed as a regular expression,
and is case-insensitive. You can pass a regular expression string directly,
e.g. `r"^(Clifton)$"`
would match “Clifton” at the beginning (`^`
) and
end (`$`
) of the string, i.e. it only matches wards that exactly
match “Clifton”. Try this by typing;
>>> clifton = network.info.find(r"^(Clifton)$")
>>> for ward in clifton:
... print(network.info[ward])
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05003119', alternate_codes=['E36001870'], authority='Allerdale', authority_code='E07000026', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05001980', alternate_codes=['E36000533'], authority='Bristol, City of', authority_code='E06000023', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05001747', alternate_codes=['E36000283'], authority='York', authority_code='E06000014', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05001648', alternate_codes=['E36000184'], authority='Blackpool', authority_code='E06000009', region='', region_code='')
WardInfo(name='Clifton', alternate_names=['Clifton'], code='E05005188', alternate_codes=['E36003801'], authority='Fylde', authority_code='E07000119', region='', region_code='')
Using location in an extractor
We can use the above search to track the total number of infections in each
of the wards in Bristol. Create a new python file called location.py
and copy in the below;
matched_wards = None
headers = []
def output_location(network, population, workspace, output_dir, **kwargs):
ward = "clifton"
authority = "bristol"
global matched_wards, headers
if matched_wards is None:
# This is performed only once, when this function is first called
ward = network.info.find(name=ward, authority=authority)[0]
ward = network.info[ward]
authority_code = ward.authority_code
matched_wards = network.info.find(authority=authority_code)
headers = []
headers.append("day")
for ward in matched_wards:
headers.append(f"'{network.info[ward].name}'")
# open the file called "authority.dat", e.g. "bristol.dat"
# Note we are using comma separators and have put the ward
# names in single quotes to make the output easier to parse
locfile = output_dir.open(f"{authority}.dat", headers=headers, sep=",")
locfile.write(str(population.day))
for ward in matched_wards:
total = workspace.total_inf_ward[ward]
locfile.write("," + str(total))
locfile.write("\n")
def extract_location(**kwargs):
from metawards.extractors import extract_default
return extract_default(**kwargs) + [output_location]
Save the file and run metawards
using this extractor via
metawards --extractor location
You should see that a new output file called bristol.dat.bz2
was
created. Loading this up into pandas should show;
>>> import pandas as pd
>>> df = pd.read_csv("output/bristol.dat.bz2", index_col="day")
>>> print(df)
'Brislington West' 'Cabot' ... 'Whitchurch Park' 'Windmill Hill'
day ...
0 0 0 ... 0 0
1 0 0 ... 0 0
2 0 0 ... 0 0
3 0 0 ... 0 0
4 0 0 ... 0 0
[5 rows x 35 columns]