From 954ac8ec1e3073705bc1bf0d89d5f6585a8c9424 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Mon, 27 May 2024 13:55:15 +0100 Subject: [PATCH] Post1.4.1 (#84) * doc updates * add python 3.12 and fix warnings * add RunState enum * update CI, tidy macros * vuln+fixes --- .github/workflows/build-test.yml | 6 +- .github/workflows/coverage.yml | 8 +- .github/workflows/draft-pdf.yml | 4 +- .github/workflows/mpi-test.yml | 6 +- .github/workflows/pypi-release.yml | 2 +- LICENCE.md | 2 +- docs/api.md | 2 +- docs/developer.md | 2 +- docs/examples/boids.md | 17 +- docs/examples/mortality.md | 2 +- docs/examples/people.md | 2 +- docs/examples/riskpaths.md | 2 +- docs/macros.py | 163 ++++--- docs/overview.md | 86 +++- docs/requirements.txt | 12 +- docs/tips.md | 41 +- examples/boids/boids2d.py | 335 +++++++------- examples/boids/boids3d.py | 424 +++++++++-------- examples/boids/run.py | 17 +- examples/chapter1/model.py | 5 +- examples/chapter1/person.py | 133 +++--- examples/competing/model.py | 7 +- examples/competing/people.py | 91 ++-- examples/competing/visualise.py | 54 ++- examples/conway/conway.py | 136 +++--- examples/conway/run_model.py | 6 +- examples/daisyworld/daisyworld.py | 271 ++++++----- examples/hello_world/model.py | 171 +++---- examples/infection/infection.py | 369 ++++++++------- examples/infection/run.py | 22 +- examples/markov_chain/markov_chain.py | 100 ++-- examples/markov_chain/model.py | 19 +- examples/markov_chain/visualisation.py | 33 +- examples/mortality/model.py | 16 +- examples/mortality/people.py | 206 +++++---- examples/mortality/plot.py | 107 +++-- examples/n-body/__init__.py | 0 examples/option/black_scholes.py | 237 +++++----- examples/option/helpers.py | 16 +- examples/option/model.py | 19 +- examples/parallel/model.py | 7 +- examples/parallel/parallel.py | 141 +++--- examples/people/model.py | 18 +- examples/people/population.py | 359 ++++++++------- examples/people/pyramid.py | 102 +++-- examples/riskpaths/data.py | 77 +++- examples/riskpaths/model.py | 6 +- examples/riskpaths/riskpaths.py | 8 +- examples/riskpaths/visualisation.py | 37 +- examples/schelling/model.py | 7 +- examples/schelling/schelling.py | 180 ++++---- examples/wolf_sheep/model.py | 47 +- examples/wolf_sheep/wolf_sheep.py | 583 ++++++++++++----------- mkdocs.yml | 27 +- neworder/__init__.py | 35 +- neworder/__init__.pyi | 329 ++++++++----- neworder/df.pyi | 35 +- neworder/domain.py | 482 ++++++++++--------- neworder/geospatial.py | 111 +++-- neworder/mc.py | 21 +- neworder/mpi.pyi | 27 +- neworder/stats.pyi | 43 +- neworder/time.pyi | 31 +- pyproject.toml | 11 +- src/Model.cpp | 35 +- src/Model.h | 87 ++-- src/Module.cpp | 14 +- src/Module.h | 1 - src/Module_docstr.cpp | 7 + test/benchmark.py | 176 +++---- test/conftest.py | 9 +- test/test_df.py | 213 +++++---- test/test_domain.py | 210 ++++----- test/test_geospatial.py | 9 +- test/test_mc.py | 612 +++++++++++++------------ test/test_model.py | 118 +++-- test/test_module.py | 99 ++-- test/test_mpi.py | 131 +++--- test/test_stats.py | 23 +- test/test_timeline.py | 587 +++++++++++++----------- untested/people_multi/people_multi.md | 2 +- 81 files changed, 4580 insertions(+), 3628 deletions(-) delete mode 100644 examples/n-body/__init__.py diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 7ca3a039..bc9a4246 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -18,12 +18,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] os: [ubuntu-latest, windows-latest, macos-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "pip: Python ${{ matrix.python-version }} / ${{ matrix.os }}" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3a07b589..9c6735c9 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -14,11 +14,11 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10" ] + python-version: [ "3.11" ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "pip: Python ${{ matrix.python-version }} coverage" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -32,5 +32,7 @@ jobs: run: | python -m pytest - name: Upload + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} run: | bash <(curl -s https://codecov.io/bash) -Z diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index 4f65c342..c553884f 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -8,14 +8,14 @@ jobs: name: Paper Draft steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Build draft PDF uses: openjournals/openjournals-draft-action@master with: journal: joss paper-path: paper/paper.md - name: Upload - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: paper path: paper/paper.pdf \ No newline at end of file diff --git a/.github/workflows/mpi-test.yml b/.github/workflows/mpi-test.yml index 4ef2f06a..1b53b90e 100644 --- a/.github/workflows/mpi-test.yml +++ b/.github/workflows/mpi-test.yml @@ -14,11 +14,11 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10"] + python-version: ["3.11"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "pip: Python ${{ matrix.python-version }}" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index ac6aaeb6..ab7d5c79 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -10,7 +10,7 @@ jobs: if: "!contains(github.event.head_commit.message, 'Bump version')" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: persist-credentials: false - name: current_version diff --git a/LICENCE.md b/LICENCE.md index bc30613d..65aafd08 100644 --- a/LICENCE.md +++ b/LICENCE.md @@ -1,6 +1,6 @@ # MIT License -Copyright © 2017-2023 Andrew P Smith +Copyright © 2017-2024 Andrew P Smith Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/api.md b/docs/api.md index 39b1c4fa..fc9e77e8 100644 --- a/docs/api.md +++ b/docs/api.md @@ -7,4 +7,4 @@ ## Type stubs -Type stubs were generated for the core C++ library using the `pybind11-stubgen` package, although significant editing of the output was required. See `__init__.pyi` for details. +Type stubs were generated for the core C++ library using the `pybind11-stubgen` package, with minor manual corrections. See `__init__.pyi` for details. diff --git a/docs/developer.md b/docs/developer.md index 75e23e25..a5927021 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -90,7 +90,7 @@ The script from [codecov.io](https://codecov.io/gh/virgesmith/neworder/) uses `g ## Generating type stubs -Type stubs can be generated for the C++ module using `pybind11-stubgen`, although manual modifications are needed for the output (e.g. docstrings for overloaded functions are misplaced, numpy types need to be fixed). +Type stubs can be generated for the C++ module using `pybind11-stubgen`, although manual modifications may be needed, plus numpy types need to be fixed globally. ```sh pybind11-stubgen _neworder_core --ignore-invalid all diff --git a/docs/examples/boids.md b/docs/examples/boids.md index 0aa4dbcd..b5238c34 100644 --- a/docs/examples/boids.md +++ b/docs/examples/boids.md @@ -22,33 +22,22 @@ Each entity travels at a fixed speed in a 2- or 3-dimensional constrained univer (if a separation is required, the boid will not attempt to align or cohere) -The entities are stored in a pandas `DataFrame` and use `neworder.Space` to update positions. There are no explicit `for` loops in the model - all position and velocity calculations are "vectorised"* for efficiency. +The entities are stored in a pandas `DataFrame` and use `neworder.Space` to update positions. There are no explicit `for` loops in the model - all position and velocity calculations are "vectorised"[^1] for efficiency. -* in this context "vectorisation" merely means the avoidance of explicit loops in an interpreted language. The actual implementation may be compiled to assembly language, vectorised in the true ([SIMD](https://en.wikipedia.org/wiki/SIMD)) sense, parallelised, optimised in other ways, or any combination thereof. +[^1]: in this context "vectorisation" merely means the avoidance of explicit loops in an interpreted language. The actual implementation may be compiled to assembly language, vectorised in the true ([SIMD](https://en.wikipedia.org/wiki/SIMD)) sense, parallelised, optimised in other ways, or any combination thereof. Run like so ```sh python examples/boids/run.py 2d ``` - -The 2d version utilises a wrap-around domain and does not implement the reversion step. - or ```sh python examples/boids/run.py 3d ``` -which runs - -{{ include_snippet("examples/boids/run.py") }} - -and this is the 3-d implementation: - -{{ include_snippet("examples/boids/boids3d.py") }} - -A 2-d implementation is also provided in `examples/boids/boids2d.py`. +The 2d version utilises a wrap-around domain and so does not require the reversion step. ## Outputs diff --git a/docs/examples/mortality.md b/docs/examples/mortality.md index 7dd98b4f..ebd8201d 100644 --- a/docs/examples/mortality.md +++ b/docs/examples/mortality.md @@ -1,6 +1,6 @@ # Mortality -We implement the example *The Life Table* from the second chapter of the book *Microsimulation and Population Dynamics* [[3]](#references). It models mortality in a homogeneous population with an age-specific mortality rate. +We implement the example *The Life Table* from the second chapter of the book *Microsimulation and Population Dynamics* [[3]](../references.md). It models mortality in a homogeneous population with an age-specific mortality rate. This example implements the model in two different ways: firstly a discrete case-based microsimulation, and again using a continuous sampling methodology, showcasing how the latter can be much more efficient. Rather than having a class to represent an individual, as would be standard in a MODGEN implementation, individuals are stored in a *pandas* `Dataframe` which provides fast iteration over the population. diff --git a/docs/examples/people.md b/docs/examples/people.md index d14f5eca..c5f4ec3f 100644 --- a/docs/examples/people.md +++ b/docs/examples/people.md @@ -6,7 +6,7 @@ In this example, the input data is a csv file containing a microsynthesised 2011 population of Newcastle generated from UK census data, by area (MSOA), age, gender and ethnicity. The transitions modelled are: ageing, births, deaths and migrations, over a period of 40 years to 2051. -Births, deaths and migrations (applied in that order) are modelled using Monte-Carlo simulation (sampling Poisson processes in various ways) using distributions parameterised by age, sex and ethnicity-specific fertility, mortality and migration rates respectively, which are largely fictitious (but inspired by data from the NewETHPOP[[1]](#references.md) project). +Births, deaths and migrations (applied in that order) are modelled using Monte-Carlo simulation (sampling Poisson processes in various ways) using distributions parameterised by age, sex and ethnicity-specific fertility, mortality and migration rates respectively, which are largely fictitious (but inspired by data from the NewETHPOP[[1]](../references.md) project). For the fertility model newborns simply inherit their mother's location and ethnicity, are born aged zero, and have a randomly selected gender (with even probability). The migration model is an 'in-out' model, i.e. it is not a full origin-destination model. Flows are either inward from 'elsewhere' or outward to 'elsewhere'. diff --git a/docs/examples/riskpaths.md b/docs/examples/riskpaths.md index 157e56c2..035962b9 100644 --- a/docs/examples/riskpaths.md +++ b/docs/examples/riskpaths.md @@ -1,6 +1,6 @@ # RiskPaths -RiskPaths is a well-known MODGEN model that is primarily used for teaching purposes and described here[[5]](#references) in terms of the model itself and here in terms of implementation[[6]](#references). It models fertility in soviet-era eastern Europe, examining fertility as a function of time and union state. In the model, a woman can enter a maximum of two unions in her lifetime. The first union is divided into two sections: a (deterministic) 3 year period during which fertility is at a maximum, followed by a (stochastic) period with lower fertility. +RiskPaths is a well-known MODGEN model that is primarily used for teaching purposes and described here[[5]](../references.md) in terms of the model itself and here in terms of implementation[[6]](../references.md). It models fertility in soviet-era eastern Europe, examining fertility as a function of time and union state. In the model, a woman can enter a maximum of two unions in her lifetime. The first union is divided into two sections: a (deterministic) 3 year period during which fertility is at a maximum, followed by a (stochastic) period with lower fertility. ![riskpaths](./img/riskpaths.png) diff --git a/docs/macros.py b/docs/macros.py index 23105b93..ffac86ce 100644 --- a/docs/macros.py +++ b/docs/macros.py @@ -1,90 +1,109 @@ # macros for mkdocs-macros-plugin -import os -import requests import importlib +import os from datetime import datetime +from functools import cache +from typing import Any + +import requests _inline_code_styles = { - ".py": "python", - ".sh": "bash", - ".h": "cpp", - ".cpp": "cpp", - ".c": "c", - ".rs": "rs", - ".js": "js", - ".md": None + ".py": "python", + ".sh": "bash", + ".h": "cpp", + ".cpp": "cpp", + ".c": "c", + ".rs": "rs", + ".js": "js", + ".md": None, } # this is the overall record id, not a specific version -_NEWORDER_ZENODO_ID = 4031821 +_NEWORDER_ZENODO_ID = 4031821 # search using this (or DOI 10.5281/zenodo.4031821) just doesnt work + + +@cache +def get_zenodo_record() -> dict[str, Any]: + try: + response = requests.get( + "https://zenodo.org/api/records", + params={ + "q": "(virgesmith) AND (neworder)", # this is the only query that seems to work + "access_token": os.getenv("ZENODO_PAT"), + }, + ) + response.raise_for_status() + # with open("zenodo-result.json", "w") as fd: + # fd.write(response.text) + return response.json()["hits"]["hits"][0] + except Exception as e: + return {f"{e.__class__.__name__}": f"{e} while retrieving zenodo record"} def write_requirements() -> None: - try: - with open("docs/requirements.txt", "w") as fd: - fd.write(f"""# DO NOT EDIT + try: + with open("docs/requirements.txt", "w") as fd: + fd.write( + f"""\ +# DO NOT EDIT # auto-generated @ {datetime.now()} by docs/macros.py::write_requirements() # required by readthedocs.io -""") - fd.writelines(f"{dep}=={importlib.metadata.version(dep)}\n" for dep in [ - "mkdocs", - "mkdocs-macros-plugin", - "mkdocs-material", - "mkdocs-material-extensions", - "mkdocs-video", - "requests" - ]) - # ignore any error, this should only run in a dev env anyway - except: - pass +""" + ) + fd.writelines( + f"{dep}=={importlib.metadata.version(dep)}\n" + for dep in [ + "mkdocs", + "mkdocs-macros-plugin", + "mkdocs-material", + "mkdocs-material-extensions", + "mkdocs-video", + "requests", + ] + ) + # ignore any error, this should only run in a dev env anyway + except: # noqa: E722 + pass def define_env(env): + @env.macro + def insert_zenodo_field(*keys: str) -> Any: + result = get_zenodo_record() + for key in keys: + result = result[key] + return result - @env.macro - def insert_zenodo_field(*keys: str): - """ This is the *released* version not the dev one """ - try: - response = requests.get('https://zenodo.org/api/records', params={'q': _NEWORDER_ZENODO_ID, 'access_token': os.getenv("ZENODO_PAT")}) - response.raise_for_status() - result = response.json()["hits"]["hits"][0] - for k in keys: - result = result[k] - return result + @env.macro + def include_snippet(filename, tag=None, show_filename=True): + """looks for code in between lines containing "!!" """ + full_filename = os.path.join(env.project_dir, filename) - except Exception as e: - return f"{e.__class__.__name__}:{e} while retrieving {keys}" - - - @env.macro - def include_snippet(filename, tag=None, show_filename=True): - """ looks for code in between lines containing "!!" """ - full_filename = os.path.join(env.project_dir, filename) - - _, file_type = os.path.splitext(filename) - # default to literal "text" for inline code style - code_style = _inline_code_styles.get(file_type, "text") - - with open(full_filename, 'r') as f: - lines = f.readlines() - - if tag: - tag = f"!{tag}!" - span = [] - for i, l in enumerate(lines): - if tag in l: - span.append(i) - if len(span) != 2: - return f"```ERROR {filename} ({code_style}) too few/many tags ({len(span)}) for '{tag}'```" - lines = lines[span[0] + 1: span[1]] - - if show_filename: - footer = f"\n[file: **{filename}**]\n" - else: - footer = "" - if code_style is not None: - return f"```{code_style}\n{''.join(lines)}```{footer}" - else: - return "".join(lines) + footer - -write_requirements() + _, file_type = os.path.splitext(filename) + # default to literal "text" for inline code style + code_style = _inline_code_styles.get(file_type, "text") + + with open(full_filename, "r") as f: + lines = f.readlines() + + if tag: + tag = f"!{tag}!" + span = [] + for i, line in enumerate(lines): + if tag in line: + span.append(i) + if len(span) != 2: + return f"```ERROR {filename} ({code_style}) too few/many tags ({len(span)}) for '{tag}'```" + lines = lines[span[0] + 1 : span[1]] + + if show_filename: + title = f'title="{filename}"' + else: + title = "" + if code_style is not None: + return f"```{code_style} {title}\n{''.join(lines)}```" + else: + return "".join(lines) + + +# write_requirements() diff --git a/docs/overview.md b/docs/overview.md index b7200316..693dc5e1 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -30,13 +30,89 @@ This is provided by: *neworder*'s timeline is conceptually a sequence of steps that are iterated over (calling the Model's `step` and (optionally) `check` methods at each iteration, plus the `finalise` method at the last time point, which is commonly used to post-process the raw model data at the end of the model run. Timelines should not be incremented in client code, this happens automatically within the model. -The framework provides four types of timeline, and is extensible: +The framework is extensible but provides four types of timeline (implemented in C++): - `NoTimeline`: an arbitrary one-step timeline which is designed for continuous-time models in which the model evolution is computed in a single step - `LinearTimeline`: a set of equally-spaced intervals in non-calendar time - `NumericTimeline`: a fully-customisable non-calendar timeline allowing for unequally-spaced intervals - `CalendarTimeline`: a timeline based on calendar dates with with (multiples of) daily, monthly or annual intervals +``` mermaid +classDiagram + Timeline <|-- NoTimeline + Timeline <|-- LinearTimeline + Timeline <|-- NumericTimeline + Timeline <|-- CalendarTimeline + Timeline <|-- CustomTimeline + + class Timeline { + +int index + +bool at_end* + +float dt* + +Any end* + +float nsteps* + +Any start* + +Any time* + +_next() None* + +__repr__() str* + } + + class NoTimeline { + +bool at_end + +float dt + +Any end + +float nsteps + +Any start + +Any time + +_next() + +__repr__() str + } + + class LinearTimeline { + +bool at_end + +float dt + +Any end + +float nsteps + +Any start + +Any time + +_next() + +__repr__() str + } + + class NumericTimeline { + +bool at_end + +float dt + +Any end + +float nsteps + +Any start + +Any time + +_next() + +__repr__() str + } + + class CalendarTimeline { + +bool at_end + +float dt + +Any end + +float nsteps + +Any start + +Any time + +_next() + +__repr__() str + } + + class CustomTimeline { + +bool at_end + +float dt + +Any end + +float nsteps + +Any start + +Any time + +_next() + +__repr__() str + } +``` + !!! note "Calendar Timelines" - Calendar timelines do not provide intraday resolution - Monthly increments preserve the day of the month (where possible) @@ -45,7 +121,7 @@ The framework provides four types of timeline, and is extensible: #### Custom timelines -If none of the supplied timelines are suitable, users can implement their own, inheriting from the abstract `neworder.Timeline` base class, which also provides an `index` property. The following must be implemented in the subclass: +If none of the supplied timelines are suitable, users can implement their own, deriving from the abstract `neworder.Timeline` base class, which provides an `index` property that should not be overidden. The following properties and methods must be overridden in the subclass: symbol | type | description -----------|-------------------|--- @@ -105,17 +181,17 @@ the following can also be optionally implemented in the model: Pretty much everything else is entirely up to the model developer. While the module is completely agnostic about the format of data, the library functions accept and return *numpy* arrays, and it is recommended to use *pandas* dataframes where appropriate in order to be able to use the fast data manipulation functionality provided. -Like MODGEN, both time-based and case-based models are supported. In the latter, the timeline refers not to absolute time but the age of the cohort. Additionally continuous-time models can be implemented, using a "null `NoTimeline` (see above) with only a single transition, and the Monte-Carlo library specifically provides functions for continuous sampling, e.g. from non-homogeneous Poisson processes. +Like MODGEN, both time-based and case-based models are supported. In the latter, the timeline refers not to absolute time but the age of the cohort. Additionally continuous-time models can be implemented, using a `NoTimeline` (see above) with only a single transition, and the Monte-Carlo library specifically provides functions for continuous sampling, e.g. from non-homogeneous Poisson processes. New users should take a look at the examples, which cover a range of applications including implementations of some MODGEN teaching models. ## Data and Performance -*neworder* is written in C++ with the python bindings provided by the *pybind11* package. As python and C++ have very different memory models, it's generally not advisable to directly share data, i.e. to safely have a python object and a C++ object both referencing (and potentially modifying) the same memory location. Thus *neworder* class member variables are accessible only via member functions and results are returned by value (i.e. copied). However, there is a crucial exception to this: the *numpy* `ndarray` type. This is fundamental to the operation of the framework, as it enables the C++ module to directly access (and modify) both *numpy* arrays and *pandas* data frames, facilitiating very fast implementation of algorithms operating directly on *pandas* DataFrames.* +*neworder* is written in C++ with the python bindings provided by the *pybind11* package. As python and C++ have very different memory models, it's generally not advisable to directly share data, i.e. to safely have a python object and a C++ object both referencing (and potentially modifying) the same memory location. Thus *neworder* class member variables are accessible only via member functions and results are returned by value (i.e. copied). However, there is a crucial exception to this: the *numpy* `ndarray` type. This is fundamental to the operation of the framework, as it enables the C++ module to directly access (and modify) both *numpy* arrays and *pandas* data frames, facilitiating very fast implementation of algorithms operating directly on *pandas* DataFrames.[^1] !!! note "Explicit Loops" To get the best performance, avoid using explicit loops in python code where "vectorised" *neworder* (or e.g. numpy) functions can be used instead. You should also bear in mind that while python is a *dynamically typed* language, C++ is *statically typed*. If an argument to a *neworder* method is not the correct type, it will fail immediately (as opposed to python, which will fail only if an invalid operation for the given type is attempted). Note also that `neworder`'s python code has type annotations. -* the `neworder.df.transition` function is *over 2 or 3 orders of magnitude faster* than a (naive) equivalent python implementation depending on the length of the dataset, and still an order of magnitude faster than an optimised python implementation. +[^1]: the `neworder.df.transition` function is *over 2 or 3 orders of magnitude faster* than a (naive) equivalent python implementation depending on the length of the dataset, and still an order of magnitude faster than an optimised python implementation. diff --git a/docs/requirements.txt b/docs/requirements.txt index 2fec0295..e87611fb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,9 @@ # DO NOT EDIT -# auto-generated @ 2023-08-30 07:58:47.663213 by docs/macros.py::write_requirements() +# auto-generated @ 2024-05-21 08:49:05.458940 by docs/macros.py::write_requirements() # required by readthedocs.io -mkdocs==1.5.2 -mkdocs-macros-plugin==1.0.4 -mkdocs-material==9.2.5 -mkdocs-material-extensions==1.1.1 +mkdocs==1.6.0 +mkdocs-macros-plugin==1.0.5 +mkdocs-material==9.5.24 +mkdocs-material-extensions==1.3.1 mkdocs-video==1.5.0 -requests==2.31.0 +requests==2.32.1 diff --git a/docs/tips.md b/docs/tips.md index fa76d2a5..29c52ae1 100644 --- a/docs/tips.md +++ b/docs/tips.md @@ -5,21 +5,21 @@ !!! warning "Base Model Initialisation" When instantiating the model subclass, it is essential that the `neworder.Model` base class is explicitly initialised. It must be supplied with a `Timeline` object and (optionally) a seeding function for the Monte-Carlo engine. Failure to do this will result in a runtime error. -For example, use this initialisation pattern: +Ensure the `neworder.Model` base class is properly initialised: -```python +```python title="Model initialisation" class MyModel(neworder.Model): def __init__(self, args...) -> None: timeline = ... # initialise an appropriate timeline - seeder = ... # (optional) set an appropriate seeding function - # this line is essential: - super().__init__(timeline, seeder) - # now initialise the subclass... + super().__init__(timeline) # (1)! + ... # now initialise the subclass ``` +1. :material-alert: this line is essential + ## Custom Seeding Strategies -!!! note "Note" +!!! note "Random number generator" *neworder* random streams use the Mersenne Twister pseudorandom generator, as implemented in the C++ standard library. *neworder* provides three basic seeding functions which initialise the model's random stream so that they are either non-reproducible (`neworder.MonteCarlo.nondeterministic_stream`), or reproducible and either identical (`neworder.MonteCarlo.deterministic_identical_stream`) or independent across parallel runs (`neworder.MonteCarlo.deterministic_independent_stream`). Typically, a user would select identical streams (and perturbed inputs) for sensitivity analysis, and independent streams (with identical inputs) for convergence analysis. @@ -99,17 +99,30 @@ x = self.nprand.normal(size=5) NB as there is only one RNG state, you can safely get independent variates when calling both the RNG directly and via numpy. -## Conditional Halting +## Ending the model run + +Models will continue to run until the end of their timeline is reached, unless explicitly told otherwise (see next section). + +!!! Note "Finalisation" + The model's `finalise` method can be optionally implemented as necessary, for example to write results to a file. It is automatically called by the *neworder* runtime **only** when the end of the timeline is reached. -In some models, rather than (or as well as) evolving the population over a fixed timeline, it may make more sense to iterate timesteps until some condition is met. The "Schelling" example illustrates this - it runs until all agents are in a satisfied state. +## Open-ended timelines and Conditional Halting -In these situations, the model developer can (conditionally) call the `Model.halt()` method from inside the model's `step()` method, which will end the model run. Currently, the `LinearTimeline` and `CalendarTimeline` classes support both fixed and open-ended timelines. +In some models, rather than (or as well as) evolving the population over a fixed timeline, it may make more sense to iterate timesteps until some condition is met. The "Schelling" example illustrates this - it runs until all agents are in a satisfied state. Currently, the inbuilt `LinearTimeline` and `CalendarTimeline` classes support both fixed and open-ended timelines. In other cases it may be useful to temporarily exit the model for later resumption. + +The model's `halt` method can be used to stop the model run. In these situations, the `step` method should have some logic to (conditionally) call the `halt` method. !!! note "`Model.halt()`" - This function *does not* end execution immediately, it signals to the *neworder* runtime not to iterate any further timesteps. This means that the entire body of the `step` method (and the `check` method, if implemented) will still be executed. Overriding the `halt` method is not recommended. + This function *does not* end execution immediately, it signals to the *neworder* runtime not to iterate any further timesteps. Calling `halt` means that: -!!! Note "Finalisation" - The `finalise` method is automatically called by the *neworder* runtime only when the end of the timeline. As open-ended timelines never reach this state, the method must can be called explicitly, if needed. + - the entire body of the `step` method (and the `check` method, if implemented) will still run for the current timestep, + - the `finalise` method, even if implemented, will **not** be excuted, + - the `neworder.run` method will then exit. + +Overriding the `halt` method should not be necessary and is not recommended. The `finalise` method, if needed, must be called explicitly for models with open-ended timelines that have been `halt`ed. + +!!! Note "Resuming execution" + A model that has previously been `halt`ed but has not reached the end of its timeline can be resumed by passing it to `neworder.run` again. Attempting to resume a model that has reached the end of it's timeline will result in a `StopIteration` exception. ## Deadlocks @@ -148,7 +161,7 @@ neworder.log(neworder.time.isnever(n)) # True ## Data Types !!! warning "Static typing" - Unlike python, C++ is a *statically typed* language and so *neworder* is strict about types. We strongly encourage the use of type annotations and a type checker (mypy) in python. + Unlike python, C++ is a *statically typed* language and so *neworder* is strict about types. We strongly encourage the use of type annotations and a type checker (e.g. mypy) in python. If an argument to a *neworder* method or function is not the correct type, it will fail immediately (as opposed to python, which will fail only if an invalid operation for the given type is attempted (a.k.a. "duck typing")). This applies to contained types (numpy's `dtype`) too. In the example below, the function is expecting an integer, and will complain if you pass it a floating-point argument: diff --git a/examples/boids/boids2d.py b/examples/boids/boids2d.py index 897056c5..f50843df 100644 --- a/examples/boids/boids2d.py +++ b/examples/boids/boids2d.py @@ -1,162 +1,189 @@ from __future__ import annotations -from typing import Any + from time import sleep +from typing import Any + +import matplotlib.pyplot as plt # type: ignore import numpy as np import pandas as pd -import neworder as no -import matplotlib.pyplot as plt # type: ignore from matplotlib.colors import ListedColormap -class Boids2d(no.Model): - - ALIGN_COEFF = 0.1 - COHERE_COEFF = 2 - SEPARATE_COEFF = .001 - AVOID_COEFF = 0.05 - - def __init__(self, N: int, range: float, vision: float, exclusion: float, speed: float) -> None: - super().__init__(no.LinearTimeline(0.0, 0.01), no.MonteCarlo.nondeterministic_stream) - - self.N = N - self.range = range - self.vision = vision - self.exclusion = exclusion - self.speed = speed - - # continuous wraparound 2d space - self.domain = no.Space(np.zeros(2), np.full(2, self.range), edge=no.Edge.WRAP) - - self.N_predators = 2 - self.paused = False - - # initially in [0,1]^dim - self.boids = pd.DataFrame( - index=pd.Index(name="id", data=no.df.unique_index(self.N)), - data={ - "x": self.mc.ustream(N) * self.range, - "y": self.mc.ustream(N) * self.range, - "vx": self.mc.ustream(N) - 0.5, - "vy": self.mc.ustream(N) - 0.5, - "c": 0.0 - } - ) - - self.__normalise() - - self.fig, self.g = self.__init_visualisation() - - # suppress division by zero warnings - np.seterr(divide='ignore') - - def step(self) -> None: - if self.paused: - sleep(0.2) - self.__update_visualisation() - return - - d2, (dx, dy) = self.domain.dists2((self.boids.x, self.boids.y)) - np.fill_diagonal(d2, np.inf) # no self-influence - - # separate - too_close = d2 < self.exclusion**2 - self.__separate(too_close, d2, dx, dy) - - # avoid predator - in_range = d2 < self.vision ** 2 - self.__avoid(in_range, d2, dx, dy) - - # mask those that needed to separate - in_range = np.logical_and(d2 < self.vision ** 2, ~too_close).astype(float) - - self.__cohere(in_range, dx, dy) - self.__align(in_range) - - self.__normalise() - - # set colours - self.boids.c = 0 - self.boids.loc[in_range[0:self.N_predators].sum(axis=0) != 0, "c"] = 1/3 - self.boids.loc[too_close[0:self.N_predators].sum(axis=0) != 0, "c"] = 2/3 - self.boids.loc[0:self.N_predators - 1, "c"] = 1 - - (self.boids.x, self.boids.y), (self.boids.vx, self.boids.vy) = self.domain.move( - (self.boids.x, self.boids.y), - (self.boids.vx, self.boids.vy), - self.timeline.dt, - ungroup=True - ) - - sleep(0.001) - self.__update_visualisation() - - def __align(self, in_range: np.ndarray) -> None: - weights = 1.0 / np.sum(in_range, axis=0) - weights[weights == np.inf] = 0.0 - - mean_vx = (in_range * self.boids.vx.values) @ weights - mean_vy = (in_range * self.boids.vy.values) @ weights - - self.boids.vx += mean_vx * Boids2d.ALIGN_COEFF - self.boids.vy += mean_vy * Boids2d.ALIGN_COEFF - - def __cohere(self, in_range: np.ndarray, dx: np.ndarray, dy: np.ndarray) -> None: - weights = 1.0 / np.sum(in_range, axis=0) - weights[weights == np.inf] = 0.0 - x = (in_range * dx) @ weights - y = (in_range * dy) @ weights - - self.boids.vx += x * Boids2d.COHERE_COEFF - self.boids.vy += y * Boids2d.COHERE_COEFF - - def __separate(self, in_range: np.ndarray, d2: np.ndarray, dx: np.ndarray, dy: np.ndarray) -> None: - # TODO clip d2? - # impact on v is proportional to 1/f - f = Boids2d.SEPARATE_COEFF / d2 * in_range - self.boids.vx += (f * dx).sum(axis=0) - self.boids.vy += (f * dy).sum(axis=0) - - def __avoid(self, in_range: np.ndarray, d2: np.ndarray, dx: np.ndarray, dy: np.ndarray) -> None: - f = Boids2d.AVOID_COEFF / d2[0:self.N_predators, :] * in_range[0:self.N_predators, :] - self.boids.vx += (f * dx[0:self.N_predators, :]).sum(axis=0) - self.boids.vy += (f * dy[0:self.N_predators, :]).sum(axis=0) - - def __normalise(self) -> None: - - norm = np.clip(np.sqrt(self.boids.vx ** 2 + self.boids.vy ** 2), a_min = 0.00001, a_max=None) - self.boids.vx *= self.speed / norm - self.boids.vy *= self.speed / norm - - # predators are faster - self.boids.loc[0:self.N_predators - 1, "vx"] *= 1.3 - self.boids.loc[0:self.N_predators - 1, "vy"] *= 1.3 - - def __init_visualisation(self) -> tuple[Any, Any]: - plt.ion() - - fig = plt.figure(constrained_layout=True, figsize=(8, 8)) - g = plt.quiver(self.boids.x, self.boids.y, - self.boids.vx / self.speed, self.boids.vy / self.speed, - scale=75, width=0.005, headwidth=2, - cmap=ListedColormap(["k", "green", "orange", "r"])) - - plt.xlim(0.0, self.range) - plt.ylim(0.0, self.range) - plt.axis("off") +import neworder as no - fig.canvas.flush_events() - def on_keypress(event: Any) -> None: - if event.key == "p": - self.paused = not self.paused - elif event.key == "q": - self.halt() - else: - no.log("%s doesnt do anything. p to pause/resume, q to quit" % event.key) - - fig.canvas.mpl_connect('key_press_event', on_keypress) - return fig, g - - def __update_visualisation(self) -> None: - self.g.set_offsets(np.c_[self.boids.x, self.boids.y]) - self.g.set_UVC(self.boids.vx / self.speed, self.boids.vy / self.speed, self.boids.c.values) - self.fig.canvas.flush_events() +class Boids2d(no.Model): + ALIGN_COEFF = 0.1 + COHERE_COEFF = 2 + SEPARATE_COEFF = 0.001 + AVOID_COEFF = 0.05 + + def __init__( + self, N: int, range: float, vision: float, exclusion: float, speed: float + ) -> None: + super().__init__( + no.LinearTimeline(0.0, 0.01), no.MonteCarlo.nondeterministic_stream + ) + + self.N = N + self.range = range + self.vision = vision + self.exclusion = exclusion + self.speed = speed + + # continuous wraparound 2d space + self.domain = no.Space(np.zeros(2), np.full(2, self.range), edge=no.Edge.WRAP) + + self.N_predators = 2 + self.paused = False + + # initially in [0,1]^dim + self.boids = pd.DataFrame( + index=pd.Index(name="id", data=no.df.unique_index(self.N)), + data={ + "x": self.mc.ustream(N) * self.range, + "y": self.mc.ustream(N) * self.range, + "vx": self.mc.ustream(N) - 0.5, + "vy": self.mc.ustream(N) - 0.5, + "c": 0.0, + }, + ) + + self.__normalise() + + self.fig, self.g = self.__init_visualisation() + + # suppress division by zero warnings + np.seterr(divide="ignore") + + def step(self) -> None: + if self.paused: + sleep(0.2) + self.__update_visualisation() + return + + d2, (dx, dy) = self.domain.dists2((self.boids.x, self.boids.y)) + np.fill_diagonal(d2, np.inf) # no self-influence + + # separate + too_close = d2 < self.exclusion**2 + self.__separate(too_close, d2, dx, dy) + + # avoid predator + in_range = d2 < self.vision**2 + self.__avoid(in_range, d2, dx, dy) + + # mask those that needed to separate + in_range = np.logical_and(d2 < self.vision**2, ~too_close).astype(float) + + self.__cohere(in_range, dx, dy) + self.__align(in_range) + + self.__normalise() + + # set colours + self.boids.c = 0.0 + self.boids.loc[in_range[0 : self.N_predators].sum(axis=0) != 0, "c"] = 1 / 3 + self.boids.loc[too_close[0 : self.N_predators].sum(axis=0) != 0, "c"] = 2 / 3 + self.boids.loc[0 : self.N_predators - 1, "c"] = 1 + + (self.boids.x, self.boids.y), (self.boids.vx, self.boids.vy) = self.domain.move( + (self.boids.x, self.boids.y), + (self.boids.vx, self.boids.vy), + self.timeline.dt, + ungroup=True, + ) + + sleep(0.001) + self.__update_visualisation() + + def __align(self, in_range: np.ndarray) -> None: + weights = 1.0 / np.sum(in_range, axis=0) + weights[weights == np.inf] = 0.0 + + mean_vx = (in_range * self.boids.vx.values) @ weights + mean_vy = (in_range * self.boids.vy.values) @ weights + + self.boids.vx += mean_vx * Boids2d.ALIGN_COEFF + self.boids.vy += mean_vy * Boids2d.ALIGN_COEFF + + def __cohere(self, in_range: np.ndarray, dx: np.ndarray, dy: np.ndarray) -> None: + weights = 1.0 / np.sum(in_range, axis=0) + weights[weights == np.inf] = 0.0 + x = (in_range * dx) @ weights + y = (in_range * dy) @ weights + + self.boids.vx += x * Boids2d.COHERE_COEFF + self.boids.vy += y * Boids2d.COHERE_COEFF + + def __separate( + self, in_range: np.ndarray, d2: np.ndarray, dx: np.ndarray, dy: np.ndarray + ) -> None: + # TODO clip d2? + # impact on v is proportional to 1/f + f = Boids2d.SEPARATE_COEFF / d2 * in_range + self.boids.vx += (f * dx).sum(axis=0) + self.boids.vy += (f * dy).sum(axis=0) + + def __avoid( + self, in_range: np.ndarray, d2: np.ndarray, dx: np.ndarray, dy: np.ndarray + ) -> None: + f = ( + Boids2d.AVOID_COEFF + / d2[0 : self.N_predators, :] + * in_range[0 : self.N_predators, :] + ) + self.boids.vx += (f * dx[0 : self.N_predators, :]).sum(axis=0) + self.boids.vy += (f * dy[0 : self.N_predators, :]).sum(axis=0) + + def __normalise(self) -> None: + norm = np.clip( + np.sqrt(self.boids.vx**2 + self.boids.vy**2), a_min=0.00001, a_max=None + ) + self.boids.vx *= self.speed / norm + self.boids.vy *= self.speed / norm + + # predators are faster + self.boids.loc[0 : self.N_predators - 1, "vx"] *= 1.3 + self.boids.loc[0 : self.N_predators - 1, "vy"] *= 1.3 + + def __init_visualisation(self) -> tuple[Any, Any]: + plt.ion() + + fig = plt.figure(constrained_layout=True, figsize=(8, 8)) + g = plt.quiver( + self.boids.x, + self.boids.y, + self.boids.vx / self.speed, + self.boids.vy / self.speed, + scale=75, + width=0.005, + headwidth=2, + cmap=ListedColormap(["k", "green", "orange", "r"]), + ) + + plt.xlim(0.0, self.range) + plt.ylim(0.0, self.range) + plt.axis("off") + + fig.canvas.flush_events() + + def on_keypress(event: Any) -> None: + if event.key == "p": + self.paused = not self.paused + elif event.key == "q": + self.halt() + else: + no.log( + "%s doesnt do anything. p to pause/resume, q to quit" % event.key + ) + + fig.canvas.mpl_connect("key_press_event", on_keypress) + + return fig, g + + def __update_visualisation(self) -> None: + self.g.set_offsets(np.c_[self.boids.x, self.boids.y]) + self.g.set_UVC( + self.boids.vx / self.speed, self.boids.vy / self.speed, self.boids.c.values + ) + self.fig.canvas.flush_events() diff --git a/examples/boids/boids3d.py b/examples/boids/boids3d.py index 5b3ab515..8d26b1ea 100644 --- a/examples/boids/boids3d.py +++ b/examples/boids/boids3d.py @@ -1,203 +1,251 @@ from __future__ import annotations -from typing import Any + from time import sleep +from typing import Any + +import matplotlib.pyplot as plt # type: ignore import numpy as np import pandas as pd + import neworder as no -import matplotlib.pyplot as plt # type: ignore -from matplotlib.colors import ListedColormap _cmap = { - 0: (0.2, 0.2, 0.2, 1.0), - 1: (0.0, 0.7, 0.0, 1.0), - 2: (1.0, 0.6, 0.0, 1.0), - 3: (1.0, 0.0, 0.0, 1.0) + 0: (0.2, 0.2, 0.2, 1.0), + 1: (0.0, 0.7, 0.0, 1.0), + 2: (1.0, 0.6, 0.0, 1.0), + 3: (1.0, 0.0, 0.0, 1.0), } -class Boids3d(no.Model): - ALIGN_COEFF = 0.1 - COHERE_COEFF = 2 - SEPARATE_COEFF = .002 - AVOID_COEFF = 0.1 - REVERT_COEFF = 0.05 +class Boids3d(no.Model): + ALIGN_COEFF = 0.1 + COHERE_COEFF = 2 + SEPARATE_COEFF = 0.002 + AVOID_COEFF = 0.1 + REVERT_COEFF = 0.05 + + def __init__( + self, N: int, range: float, vision: float, exclusion: float, speed: float + ) -> None: + super().__init__( + no.LinearTimeline(0.0, 0.01), no.MonteCarlo.nondeterministic_stream + ) + + self.N = N + self.range = range + self.vision = vision + self.exclusion = exclusion + self.speed = speed + + # unconstrained 3d space + self.domain = no.Space( + np.zeros(3), np.full(3, self.range), edge=no.Edge.UNBOUNDED + ) + + self.N_predators = 1 + + # initially in [0,1]^dim + self.boids = pd.DataFrame( + index=pd.Index(name="id", data=no.df.unique_index(self.N)), + data={ + "x": self.mc.ustream(N) * self.range, + "y": self.mc.ustream(N) * self.range, + "z": self.mc.ustream(N) * self.range, + "vx": self.mc.ustream(N) - 0.5, + "vy": self.mc.ustream(N) - 0.5, + "vz": self.mc.ustream(N) - 0.5, + "c": 0, + }, + ) + + self.__normalise() + + self.fig, self.g = self.__init_visualisation() + + self.paused = False + + # suppress divsion by zero warnings + np.seterr(divide="ignore") + + def step(self) -> None: + if self.paused: + sleep(0.2) + self.__update_visualisation() + return + + d2, (dx, dy, dz) = self.domain.dists2( + (self.boids.x, self.boids.y, self.boids.z) + ) + np.fill_diagonal(d2, np.inf) # no self-influence + + # separate + too_close = d2 < self.exclusion**2 + self.__separate(too_close, d2, dx, dy, dz) + + # avoid predator + in_range = d2 < self.vision**2 + self.__avoid(in_range, d2, dx, dy, dz) + + # mask those that needed to separate + in_range = np.logical_and(in_range, ~too_close).astype(float) + + self.__cohere(in_range, dx, dy, dz) + self.__align(in_range) + + # favour returning to the origin + self.__revert() + + self.__normalise() + + # set colours + self.boids.c = 0 + self.boids.loc[in_range[0 : self.N_predators].sum(axis=0) != 0, "c"] = 1 + self.boids.loc[too_close[0 : self.N_predators].sum(axis=0) != 0, "c"] = 2 + self.boids.loc[0 : self.N_predators - 1, "c"] = 3 + + ( + (self.boids.x, self.boids.y, self.boids.z), + (self.boids.vx, self.boids.vy, self.boids.vz), + ) = self.domain.move( + (self.boids.x, self.boids.y, self.boids.z), + (self.boids.vx, self.boids.vy, self.boids.vz), + self.timeline.dt, + ungroup=True, + ) + + sleep(0.001) + self.__update_visualisation() + + def __align(self, in_range: np.ndarray) -> None: + weights = 1.0 / np.sum(in_range, axis=0) + weights[weights == np.inf] = 0.0 + + mean_vx = (in_range * self.boids.vx.values) @ weights + mean_vy = (in_range * self.boids.vy.values) @ weights + mean_vz = (in_range * self.boids.vz.values) @ weights + + self.boids.vx += mean_vx * Boids3d.ALIGN_COEFF + self.boids.vy += mean_vy * Boids3d.ALIGN_COEFF + self.boids.vz += mean_vz * Boids3d.ALIGN_COEFF + + def __cohere( + self, in_range: np.ndarray, dx: np.ndarray, dy: np.ndarray, dz: np.ndarray + ) -> None: + weights = 1.0 / np.sum(in_range, axis=0) + weights[weights == np.inf] = 0.0 + x = (in_range * dx) @ weights + y = (in_range * dy) @ weights + z = (in_range * dz) @ weights + + self.boids.vx += x * Boids3d.COHERE_COEFF + self.boids.vy += y * Boids3d.COHERE_COEFF + self.boids.vz += z * Boids3d.COHERE_COEFF + + def __separate( + self, + in_range: np.ndarray, + d2: np.ndarray, + dx: np.ndarray, + dy: np.ndarray, + dz: np.ndarray, + ) -> None: + # TODO clip d2? + # impact on v is proportional to 1/f + f = Boids3d.SEPARATE_COEFF / d2 * in_range + self.boids.vx += (f * dx).sum(axis=0) + self.boids.vy += (f * dy).sum(axis=0) + self.boids.vz += (f * dz).sum(axis=0) + + def __avoid( + self, + in_range: np.ndarray, + d2: np.ndarray, + dx: np.ndarray, + dy: np.ndarray, + dz: np.ndarray, + ) -> None: + f = ( + Boids3d.AVOID_COEFF + / d2[0 : self.N_predators, :] + * in_range[0 : self.N_predators, :] + ) + self.boids.vx += (f * dx[0 : self.N_predators, :]).sum(axis=0) + self.boids.vy += (f * dy[0 : self.N_predators, :]).sum(axis=0) + self.boids.vz += (f * dz[0 : self.N_predators, :]).sum(axis=0) + + def __revert(self) -> None: + """Return to the origin""" + self.boids.vx -= (self.boids.x - self.range / 2) * Boids3d.REVERT_COEFF + self.boids.vy -= (self.boids.y - self.range / 2) * Boids3d.REVERT_COEFF + self.boids.vz -= (self.boids.z - self.range / 2) * Boids3d.REVERT_COEFF + + def __normalise(self) -> None: + # normalise speed + norm = np.clip( + np.sqrt(self.boids.vx**2 + self.boids.vy**2 + self.boids.vz**2), + a_min=0.00001, + a_max=None, + ) + self.boids.vx *= self.speed / norm + self.boids.vy *= self.speed / norm + self.boids.vz *= self.speed / norm + + # predators are faster + self.boids.loc[0 : self.N_predators - 1, "vx"] *= 1.5 + self.boids.loc[0 : self.N_predators - 1, "vy"] *= 1.5 + self.boids.loc[0 : self.N_predators - 1, "vz"] *= 1.5 + + def __init_visualisation(self) -> tuple[Any, Any]: + plt.ion() + + fig = plt.figure(figsize=(10, 10)) + fig.suptitle("[p to pause, q to quit]", y=0.05, x=0.1) + ax = plt.axes() # projection="3d") + + g = ax.scatter( + _project(self.boids.x, self.boids.z, self.range / 2), + _project(self.boids.y, self.boids.z, self.range / 2), + c=self.boids.c.map(_cmap), + s=_size(self.boids.z), + ) + + ax.set_xlim(0.0, self.range) + ax.set_ylim(0.0, self.range) + plt.axis("off") + plt.tight_layout() + fig.canvas.flush_events() + + def on_keypress(event): + if event.key == "p": + self.paused = not self.paused + elif event.key == "q": + self.halt() + else: + no.log( + "%s doesnt do anything. p to pause/resume, q to quit" % event.key + ) + + fig.canvas.mpl_connect("key_press_event", on_keypress) + return fig, g + + def __update_visualisation(self) -> None: + self.g.set_offsets( + np.c_[ + _project(self.boids.x, self.boids.z, self.range / 2), + _project(self.boids.y, self.boids.z, self.range / 2), + ] + ) + self.g.set_sizes(_size(self.boids.z)) + self.g.set_facecolor(self.boids.c.map(_cmap)) + self.fig.canvas.draw() + self.fig.canvas.flush_events() - def __init__(self, N: int, range: float, vision: float, exclusion: float, speed: float) -> None: - super().__init__(no.LinearTimeline(0.0, 0.01), no.MonteCarlo.nondeterministic_stream) - - self.N = N - self.range = range - self.vision = vision - self.exclusion = exclusion - self.speed = speed - - # unconstrained 3d space - self.domain = no.Space(np.zeros(3), np.full(3, self.range), edge=no.Edge.UNBOUNDED) - - self.N_predators = 1 - - # initially in [0,1]^dim - self.boids = pd.DataFrame( - index=pd.Index(name="id", data=no.df.unique_index(self.N)), - data={ - "x": self.mc.ustream(N) * self.range, - "y": self.mc.ustream(N) * self.range, - "z": self.mc.ustream(N) * self.range, - "vx": self.mc.ustream(N) - 0.5, - "vy": self.mc.ustream(N) - 0.5, - "vz": self.mc.ustream(N) - 0.5, - "c": 0 - } - ) - - self.__normalise() - - self.fig, self.g = self.__init_visualisation() - - self.paused = False - - # suppress divsion by zero warnings - np.seterr(divide='ignore') - - def step(self) -> None: - if self.paused: - sleep(0.2) - self.__update_visualisation() - return - - d2, (dx, dy, dz) = self.domain.dists2((self.boids.x, self.boids.y, self.boids.z)) - np.fill_diagonal(d2, np.inf) # no self-influence - - # separate - too_close = d2 < self.exclusion**2 - self.__separate(too_close, d2, dx, dy, dz) - - # avoid predator - in_range = d2 < self.vision ** 2 - self.__avoid(in_range, d2, dx, dy, dz) - - # mask those that needed to separate - in_range = np.logical_and(in_range, ~too_close).astype(float) - - self.__cohere(in_range, dx, dy, dz) - self.__align(in_range) - - # favour returning to the origin - self.__revert() - - self.__normalise() - - # set colours - self.boids.c = 0 - self.boids.loc[in_range[0:self.N_predators].sum(axis=0) != 0, "c"] = 1 - self.boids.loc[too_close[0:self.N_predators].sum(axis=0) != 0, "c"] = 2 - self.boids.loc[0:self.N_predators - 1, "c"] = 3 - - (self.boids.x, self.boids.y, self.boids.z), (self.boids.vx, self.boids.vy, self.boids.vz) = self.domain.move( - (self.boids.x, self.boids.y, self.boids.z), - (self.boids.vx, self.boids.vy, self.boids.vz), - self.timeline.dt, - ungroup=True - ) - - sleep(0.001) - self.__update_visualisation() - - def __align(self, in_range: np.ndarray) -> None: - weights = 1.0 / np.sum(in_range, axis=0) - weights[weights == np.inf] = 0.0 - - mean_vx = (in_range * self.boids.vx.values) @ weights - mean_vy = (in_range * self.boids.vy.values) @ weights - mean_vz = (in_range * self.boids.vz.values) @ weights - - self.boids.vx += mean_vx * Boids3d.ALIGN_COEFF - self.boids.vy += mean_vy * Boids3d.ALIGN_COEFF - self.boids.vz += mean_vz * Boids3d.ALIGN_COEFF - - def __cohere(self, in_range: np.ndarray, dx: np.ndarray, dy: np.ndarray, dz: np.ndarray) -> None: - weights = 1.0 / np.sum(in_range, axis=0) - weights[weights == np.inf] = 0.0 - x = (in_range * dx) @ weights - y = (in_range * dy) @ weights - z = (in_range * dz) @ weights - - self.boids.vx += x * Boids3d.COHERE_COEFF - self.boids.vy += y * Boids3d.COHERE_COEFF - self.boids.vz += z * Boids3d.COHERE_COEFF - - def __separate(self, in_range: np.ndarray, d2: np.ndarray, dx: np.ndarray, dy: np.ndarray, dz: np.ndarray) -> None: - # TODO clip d2? - # impact on v is proportional to 1/f - f = Boids3d.SEPARATE_COEFF / d2 * in_range - self.boids.vx += (f * dx).sum(axis=0) - self.boids.vy += (f * dy).sum(axis=0) - self.boids.vz += (f * dz).sum(axis=0) - - def __avoid(self, in_range: np.ndarray, d2: np.ndarray, dx: np.ndarray, dy: np.ndarray, dz: np.ndarray) -> None: - f = Boids3d.AVOID_COEFF / d2[0:self.N_predators, :] * in_range[0:self.N_predators, :] - self.boids.vx += (f * dx[0:self.N_predators, :]).sum(axis=0) - self.boids.vy += (f * dy[0:self.N_predators, :]).sum(axis=0) - self.boids.vz += (f * dz[0:self.N_predators, :]).sum(axis=0) - - def __revert(self) -> None: - """Return to the origin""" - self.boids.vx -= (self.boids.x - self.range / 2) * Boids3d.REVERT_COEFF - self.boids.vy -= (self.boids.y - self.range / 2) * Boids3d.REVERT_COEFF - self.boids.vz -= (self.boids.z - self.range / 2) * Boids3d.REVERT_COEFF - - def __normalise(self) -> None: - # normalise speed - norm = np.clip(np.sqrt(self.boids.vx ** 2 + self.boids.vy ** 2 + self.boids.vz ** 2), a_min = 0.00001, a_max=None) - self.boids.vx *= self.speed / norm - self.boids.vy *= self.speed / norm - self.boids.vz *= self.speed / norm - - # predators are faster - self.boids.loc[0:self.N_predators - 1, "vx"] *= 1.5 - self.boids.loc[0:self.N_predators - 1, "vy"] *= 1.5 - self.boids.loc[0:self.N_predators - 1, "vz"] *= 1.5 - - def __init_visualisation(self) -> tuple[Any, Any]: - plt.ion() - - fig = plt.figure(figsize=(10, 10)) - fig.suptitle("[p to pause, q to quit]", y=0.05, x=0.1) - ax = plt.axes() # projection="3d") - - g = ax.scatter(_project(self.boids.x, self.boids.z, self.range / 2), - _project(self.boids.y, self.boids.z, self.range / 2), - c=self.boids.c.map(_cmap), s=_size(self.boids.z)) - - ax.set_xlim(0.0, self.range) - ax.set_ylim(0.0, self.range) - plt.axis("off") - plt.tight_layout() - fig.canvas.flush_events() - - def on_keypress(event): - if event.key == "p": - self.paused = not self.paused - elif event.key == "q": - self.halt() - else: - no.log("%s doesnt do anything. p to pause/resume, q to quit" % event.key) - - fig.canvas.mpl_connect('key_press_event', on_keypress) - return fig, g - - def __update_visualisation(self) -> None: - self.g.set_offsets(np.c_[_project(self.boids.x, self.boids.z, self.range / 2), - _project(self.boids.y, self.boids.z, self.range / 2)]) - self.g.set_sizes(_size(self.boids.z)) - self.g.set_facecolor(self.boids.c.map(_cmap)) - self.fig.canvas.draw() - self.fig.canvas.flush_events() def _project(a: np.ndarray, z: np.ndarray, c: float) -> np.ndarray: - d = 1.0 - # centre, adjust, recentre - return (a - c) * d / (1 + z) + c + d = 1.0 + # centre, adjust, recentre + return (a - c) * d / (1 + z) + c + def _size(z: np.ndarray) -> np.ndarray: - return 5.0 / (0.5 + z) # np.clip(.5 + z, a_min=0.1, a_max=None) \ No newline at end of file + return 5.0 / (0.5 + z) # np.clip(.5 + z, a_min=0.1, a_max=None) diff --git a/examples/boids/run.py b/examples/boids/run.py index ece3ad65..253422a3 100644 --- a/examples/boids/run.py +++ b/examples/boids/run.py @@ -1,24 +1,25 @@ import sys + import neworder as no # perfomance can be improved by **reducing** the number of threads numpy uses # e.g. set OPENBLAS_NUM_THREADS=2 # not sure why but might be contention with the graphical rendering -N = 1000 # number of boids -range = 1.0 # extent of the domain -vision = 0.2 # distance boids "see" -exclusion = 0.05 # distance collision avoidance kicks in +N = 1000 # number of boids +range = 1.0 # extent of the domain +vision = 0.2 # distance boids "see" +exclusion = 0.05 # distance collision avoidance kicks in speed = 1.0 if len(sys.argv) != 2 or sys.argv[1] not in ["2d", "3d"]: - print("usage: python examples/boids/run.py 2d|3d") - exit(1) + print("usage: python examples/boids/run.py 2d|3d") + exit(1) if sys.argv[1] == "2d": - from boids2d import Boids2d as Boids + from boids2d import Boids2d as Boids else: - from boids3d import Boids3d as Boids + from boids3d import Boids3d as Boids m = Boids(N, range, vision, exclusion, speed) diff --git a/examples/chapter1/model.py b/examples/chapter1/model.py index 253c71db..8c5213a1 100644 --- a/examples/chapter1/model.py +++ b/examples/chapter1/model.py @@ -4,9 +4,10 @@ See https://www.microsimulationandpopulationdynamics.com/ """ import numpy as np -import neworder from person import People +import neworder + # neworder.verbose() # uncomment for detailed output # "An arbitrarily selected value, chosen to produce a life expectancy of about 70 years." @@ -20,4 +21,4 @@ # now we can sample the population generated by the model to see the proportion of deaths at (arbitrarily) 10 year intervals for age in np.linspace(10.0, 100.0, 10): - neworder.log("Age %.0f survival rate = %.1f%%" % (age, model.alive(age) * 100.0)) + neworder.log("Age %.0f survival rate = %.1f%%" % (age, model.alive(age) * 100.0)) diff --git a/examples/chapter1/person.py b/examples/chapter1/person.py index 551ef592..c4b30f5a 100644 --- a/examples/chapter1/person.py +++ b/examples/chapter1/person.py @@ -1,71 +1,88 @@ # the framework must be explicitly imported -import neworder import numpy as np +import neworder + + # !person! -class Person(): - """ - MODGEN equivalent: actor Person {...} - Represents a single individual - """ - def __init__(self, mortality_hazard: float) -> None: - """ MODGEN equivalent: Person::Start() """ - self.alive = True - # MODGEN would automatically create time and age, though they are not needed for this simple example - self.mortality_hazard = mortality_hazard - self.time_mortality = neworder.time.NEVER # to be computed later - - def finish(self) -> None: - """ MODGEN equivalent: Person::Finish() """ - # nothing required here - - def state(self, t: float) -> bool: - """ Returns the person's state (alive/dead) at age t """ - return self.time_mortality > t - - def time_mortality_event(self, mc: neworder.MonteCarlo) -> float: - """ MODGEN equivalent: TIME Person::timeMortalityEvent() """ - if neworder.time.isnever(self.time_mortality): - self.time_mortality = mc.stopping(self.mortality_hazard, 1)[0] - return self.time_mortality - - def mortality_event(self) -> None: - """ MODGEN equivalent: void Person::MortalityEvent() """ - # NB this is not used in this implementation - self.alive = False +class Person: + """ + MODGEN equivalent: actor Person {...} + Represents a single individual + """ + + def __init__(self, mortality_hazard: float) -> None: + """MODGEN equivalent: Person::Start()""" + self.alive = True + # MODGEN would automatically create time and age, though they are not needed for this simple example + self.mortality_hazard = mortality_hazard + self.time_mortality = neworder.time.NEVER # to be computed later + + def finish(self) -> None: + """MODGEN equivalent: Person::Finish()""" + # nothing required here + + def state(self, t: float) -> bool: + """Returns the person's state (alive/dead) at age t""" + return self.time_mortality > t + + def time_mortality_event(self, mc: neworder.MonteCarlo) -> float: + """MODGEN equivalent: TIME Person::timeMortalityEvent()""" + if neworder.time.isnever(self.time_mortality): + self.time_mortality = mc.stopping(self.mortality_hazard, 1)[0] + return self.time_mortality + + def mortality_event(self) -> None: + """MODGEN equivalent: void Person::MortalityEvent()""" + # NB this is not used in this implementation + self.alive = False + + # !person! + # !constructor! class People(neworder.Model): - """ A model containing an aggregration of Person objects """ - def __init__(self, mortality_hazard: float, n: int) -> None: + """A model containing an aggregration of Person objects""" - # initialise base model with a nondeterministic seed results will vary (slightly) - super().__init__(neworder.NoTimeline(), neworder.MonteCarlo.nondeterministic_stream) + def __init__(self, mortality_hazard: float, n: int) -> None: + # initialise base model with a nondeterministic seed results will vary (slightly) + super().__init__( + neworder.NoTimeline(), neworder.MonteCarlo.nondeterministic_stream + ) - # initialise population - self.population = [Person(mortality_hazard) for _ in range(n)] - neworder.log("created %d individuals" % n) -# !constructor! + # initialise population + self.population = [Person(mortality_hazard) for _ in range(n)] + neworder.log("created %d individuals" % n) -# !step! - def step(self) -> None: - # sample each person's age at death. - # (this is not an efficient implementation when everyone has the same hazard rate) - [p.time_mortality_event(self.mc) for p in self.population] -# !step! - -# !finalise! - def finalise(self) -> None: - # compute mean sampled life expectancy against theoretical - sample_le = sum([p.time_mortality for p in self.population]) / len(self.population) - actual_le = 1.0 / self.population[0].mortality_hazard - error = sample_le - actual_le - neworder.log("Life expectancy = %.2f years (sampling error=%.2f years)" % (sample_le, error)) -# !finalise! + # !constructor! -# !alive! - def alive(self, t: float) -> float: - return np.mean([p.state(t) for p in self.population]) -# !alive! + # !step! + def step(self) -> None: + # sample each person's age at death. + # (this is not an efficient implementation when everyone has the same hazard rate) + [p.time_mortality_event(self.mc) for p in self.population] + + # !step! + # !finalise! + def finalise(self) -> None: + # compute mean sampled life expectancy against theoretical + sample_le = sum([p.time_mortality for p in self.population]) / len( + self.population + ) + actual_le = 1.0 / self.population[0].mortality_hazard + error = sample_le - actual_le + neworder.log( + "Life expectancy = %.2f years (sampling error=%.2f years)" + % (sample_le, error) + ) + + # !finalise! + + # !alive! + def alive(self, t: float) -> float: + return np.mean([p.state(t) for p in self.population]) + + +# !alive! diff --git a/examples/competing/model.py b/examples/competing/model.py index 9cd8f8e3..7b5145e5 100644 --- a/examples/competing/model.py +++ b/examples/competing/model.py @@ -1,17 +1,19 @@ """ Competing risks - fertility & mortality """ -import neworder # model implementation from people import People + # separate visualisation code from visualise import plot +import neworder + # neworder.verbose() # create model # data are for white British women in a London Borough at 1 year time resolution -dt = 1.0 # years +dt = 1.0 # years fertility_hazard_file = "examples/competing/fertility-wbi.csv" mortality_hazard_file = "examples/competing/mortality-wbi.csv" population_size = 100000 @@ -22,4 +24,3 @@ # visualise results plot(pop) - diff --git a/examples/competing/people.py b/examples/competing/people.py index 6a969a17..3c1caa80 100644 --- a/examples/competing/people.py +++ b/examples/competing/people.py @@ -1,45 +1,58 @@ - import numpy as np import pandas as pd # type: ignore + import neworder as no class People(no.Model): - """ A simple aggregration of Persons each represented as a row in a data frame """ - def __init__(self, dt: float, fertility_hazard_file: str, mortality_hazard_file: str, n: int) -> None: - - super().__init__(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) - - self.dt = dt # time resolution of fertility/mortality data - - self.fertility_hazard = pd.read_csv(fertility_hazard_file) - self.mortality_hazard = pd.read_csv(mortality_hazard_file) - - # store the largest age we have a rate for - self.max_rate_age = int(max(self.mortality_hazard.DC1117EW_C_AGE) - 1) - - # initialise cohort - self.population = pd.DataFrame(index=no.df.unique_index(n), - data={"parity": 0, - "time_of_death": no.time.FAR_FUTURE}) - - def step(self) -> None: - # sample deaths - self.population["time_of_death"] = self.mc.first_arrival(self.mortality_hazard.Rate.values, self.dt, len(self.population)) - - # sample (multiple) births with events at least 9 months apart - births = self.mc.arrivals(self.fertility_hazard.Rate.values, self.dt, len(self.population), 0.75) - - # the number of columns is governed by the maximum number of arrivals in the births data - for i in range(births.shape[1]): - col = "time_of_baby_" + str(i + 1) - self.population[col] = births[:, i] - # remove births that would have occured after death - self.population.loc[self.population[col] > self.population.time_of_death, col] = no.time.NEVER - self.population.parity = self.population.parity + ~no.time.isnever(self.population[col].values) - - def finalise(self) -> None: - # compute means - no.log("birth rate = %f" % np.mean(self.population.parity)) - no.log("percentage mothers = %f" % (100.0 * np.mean(self.population.parity > 0))) - no.log("life expexctancy = %f" % np.mean(self.population.time_of_death)) + """A simple aggregration of Persons each represented as a row in a data frame""" + + def __init__( + self, dt: float, fertility_hazard_file: str, mortality_hazard_file: str, n: int + ) -> None: + super().__init__(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + + self.dt = dt # time resolution of fertility/mortality data + + self.fertility_hazard = pd.read_csv(fertility_hazard_file) + self.mortality_hazard = pd.read_csv(mortality_hazard_file) + + # store the largest age we have a rate for + self.max_rate_age = int(max(self.mortality_hazard.DC1117EW_C_AGE) - 1) + + # initialise cohort + self.population = pd.DataFrame( + index=no.df.unique_index(n), + data={"parity": 0, "time_of_death": no.time.FAR_FUTURE}, + ) + + def step(self) -> None: + # sample deaths + self.population["time_of_death"] = self.mc.first_arrival( + self.mortality_hazard.Rate.values, self.dt, len(self.population) + ) + + # sample (multiple) births with events at least 9 months apart + births = self.mc.arrivals( + self.fertility_hazard.Rate.values, self.dt, len(self.population), 0.75 + ) + + # the number of columns is governed by the maximum number of arrivals in the births data + for i in range(births.shape[1]): + col = "time_of_baby_" + str(i + 1) + self.population[col] = births[:, i] + # remove births that would have occured after death + self.population.loc[ + self.population[col] > self.population.time_of_death, col + ] = no.time.NEVER + self.population.parity = self.population.parity + ~no.time.isnever( + self.population[col].values + ) + + def finalise(self) -> None: + # compute means + no.log("birth rate = %f" % np.mean(self.population.parity)) + no.log( + "percentage mothers = %f" % (100.0 * np.mean(self.population.parity > 0)) + ) + no.log("life expexctancy = %f" % np.mean(self.population.time_of_death)) diff --git a/examples/competing/visualise.py b/examples/competing/visualise.py index fca9a9fd..9dd55903 100644 --- a/examples/competing/visualise.py +++ b/examples/competing/visualise.py @@ -1,36 +1,40 @@ import numpy as np -from matplotlib import pyplot as plt # type: ignore - +from matplotlib import pyplot as plt # type: ignore from people import People + def plot(model: People) -> None: - plot_age(model) - plt.show() - plot_parity(model) - plt.show() + plot_age(model) + plt.show() + plot_parity(model) + plt.show() def plot_age(model: People) -> None: - bins = np.arange(model.max_rate_age) + bins = np.arange(model.max_rate_age) - b = [model.population.time_of_baby_1, - model.population.time_of_baby_2, - model.population.time_of_baby_3, - model.population.time_of_baby_4, - model.population.time_of_baby_5] - plt.hist(b, bins, stacked=True) - plt.hist(model.population.time_of_death, bins, color='black') - plt.title("Competing risks of childbirth and death") - plt.legend(["1st birth", "2nd birth", "3rd birth", "4th birth", "5th birth", "Death"]) - plt.xlabel("Age (y)") - plt.ylabel("Frequency") - # plt.savefig("./docs/examples/img/competing_hist_100k.png", dpi=80) + b = [ + model.population.time_of_baby_1, + model.population.time_of_baby_2, + model.population.time_of_baby_3, + model.population.time_of_baby_4, + model.population.time_of_baby_5, + ] + plt.hist(b, bins, stacked=True) + plt.hist(model.population.time_of_death, bins, color="black") + plt.title("Competing risks of childbirth and death") + plt.legend( + ["1st birth", "2nd birth", "3rd birth", "4th birth", "5th birth", "Death"] + ) + plt.xlabel("Age (y)") + plt.ylabel("Frequency") + # plt.savefig("./docs/examples/img/competing_hist_100k.png", dpi=80) def plot_parity(model: People) -> None: - bins = np.arange(model.population.parity.max()) - 0.25 - plt.hist(model.population.parity, bins, width=0.5) - plt.title("Births during lifetime") - plt.xlabel("Number of children") - plt.ylabel("Frequency") - # plt.savefig("./docs/examples/img/competing_births_100k.png", dpi=80) + bins = np.arange(model.population.parity.max()) - 0.25 + plt.hist(model.population.parity, bins, width=0.5) + plt.title("Births during lifetime") + plt.xlabel("Number of children") + plt.ylabel("Frequency") + # plt.savefig("./docs/examples/img/competing_births_100k.png", dpi=80) diff --git a/examples/conway/conway.py b/examples/conway/conway.py index 838ca400..03e08838 100644 --- a/examples/conway/conway.py +++ b/examples/conway/conway.py @@ -1,65 +1,83 @@ from __future__ import annotations -import numpy as np -import neworder as no + import matplotlib.pyplot as plt # type: ignore -from matplotlib.image import AxesImage # type: ignore +import numpy as np from matplotlib import colors +from matplotlib.image import AxesImage # type: ignore +import neworder as no -class Conway(no.Model): - - __glider = np.array([[0, 0, 1], [1, 0, 1], [0, 1, 1]], dtype=int) - - def __init__(self, nx: int, ny: int, n: int, edge: no.Edge=no.Edge.WRAP) -> None: - super().__init__(no.LinearTimeline(0, 1), no.MonteCarlo.nondeterministic_stream) - - # create n automata at regular positions - init_state = np.zeros((nx * ny)) - init_state[::2] = 1 - init_state[::7] = 1 - - self.domain = no.StateGrid(init_state.reshape(ny, nx), edge=edge) - - #self.domain.state[20:23, 20:23] = Conway.__glider - - self.fig, self.g = self.__init_visualisation() - - # !step! - def step(self) -> None: - n = self.domain.count_neighbours(lambda x: x > 0) - - deaths = np.logical_or(n < 2, n > 3) - births = n == 3 - - self.domain.state = self.domain.state * ~deaths + births - - self.__update_visualisation() - # !step! - - def check(self) -> bool: - # randomly place a glider (not across edge) - if self.timeline.index == 0: - x = self.mc.raw() % (self.domain.state.shape[0] - 2) - y = self.mc.raw() % (self.domain.state.shape[1] - 2) - self.domain.state[x:x+3, y:y+3] = np.rot90(Conway.__glider, self.mc.raw() % 4) - return True - - def __init_visualisation(self) -> tuple[plt.Figure, AxesImage]: - plt.ion() - cmap = colors.ListedColormap(['black', 'white', 'purple', 'blue', 'green', 'yellow', 'orange', 'red', 'brown']) - fig = plt.figure(constrained_layout=True, figsize=(8, 8)) - g = plt.imshow(self.domain.state, cmap=cmap, vmax=9) - plt.axis("off") - - fig.canvas.flush_events() - fig.canvas.mpl_connect('key_press_event', lambda event: self.halt() if event.key == "q" else None) - - return fig, g - - def __update_visualisation(self) -> None: - self.g.set_data(self.domain.state) - # plt.savefig("/tmp/conway%04d.png" % self.timeline.index, dpi=80) - # if self.timeline.index > 100: - # self.halt() - self.fig.canvas.flush_events() +class Conway(no.Model): + __glider = np.array([[0, 0, 1], [1, 0, 1], [0, 1, 1]], dtype=int) + + def __init__(self, nx: int, ny: int, edge: no.Edge = no.Edge.WRAP) -> None: + super().__init__(no.LinearTimeline(0, 1), no.MonteCarlo.nondeterministic_stream) + + # create n automata at regular positions + init_state = np.zeros((nx * ny)) + init_state[::2] = 1 + init_state[::7] = 1 + + self.domain = no.StateGrid(init_state.reshape(ny, nx), edge=edge) + + # self.domain.state[20:23, 20:23] = Conway.__glider + + self.fig, self.g = self.__init_visualisation() + + # !step! + def step(self) -> None: + n = self.domain.count_neighbours(lambda x: x > 0) + + deaths = np.logical_or(n < 2, n > 3) + births = n == 3 + + self.domain.state = self.domain.state * ~deaths + births + + self.__update_visualisation() + + # !step! + + def check(self) -> bool: + # randomly place a glider (not across edge) + if self.timeline.index == 0: + x = self.mc.raw() % (self.domain.state.shape[0] - 2) + y = self.mc.raw() % (self.domain.state.shape[1] - 2) + self.domain.state[x : x + 3, y : y + 3] = np.rot90( + Conway.__glider, self.mc.raw() % 4 + ) + return True + + def __init_visualisation(self) -> tuple[plt.Figure, AxesImage]: + plt.ion() + cmap = colors.ListedColormap( + [ + "black", + "white", + "purple", + "blue", + "green", + "yellow", + "orange", + "red", + "brown", + ] + ) + fig = plt.figure(constrained_layout=True, figsize=(8, 8)) + g = plt.imshow(self.domain.state, cmap=cmap, vmax=9) + plt.axis("off") + + fig.canvas.flush_events() + fig.canvas.mpl_connect( + "key_press_event", lambda event: self.halt() if event.key == "q" else None + ) + + return fig, g + + def __update_visualisation(self) -> None: + self.g.set_data(self.domain.state) + # plt.savefig("/tmp/conway%04d.png" % self.timeline.index, dpi=80) + # if self.timeline.index > 100: + # self.halt() + + self.fig.canvas.flush_events() diff --git a/examples/conway/run_model.py b/examples/conway/run_model.py index 04838a02..709ff748 100644 --- a/examples/conway/run_model.py +++ b/examples/conway/run_model.py @@ -1,5 +1,5 @@ - from conway import Conway + import neworder as no # size of domain @@ -8,9 +8,7 @@ # saturation (proportion initially alive) sat = 0.36 -n = int(nx * ny * sat) - # edges wrap - try with no.Edge.CONSTRAIN -m = Conway(nx, ny, n, no.Edge.WRAP) +m = Conway(nx, ny, no.Edge.WRAP) no.run(m) diff --git a/examples/daisyworld/daisyworld.py b/examples/daisyworld/daisyworld.py index b694f0aa..e3cd7153 100644 --- a/examples/daisyworld/daisyworld.py +++ b/examples/daisyworld/daisyworld.py @@ -1,131 +1,162 @@ - -from matplotlib.image import AxesImage # type: ignore -import neworder as no -import numpy as np import matplotlib.pyplot as plt # type: ignore +import numpy as np from matplotlib import colors +from matplotlib.image import AxesImage # type: ignore from scipy import signal # type: ignore +import neworder as no + solar_luminosity = 0.8 class DaisyWorld(no.Model): - - EMPTY = 0 - WHITE_DAISY = 1 - BLACK_DAISY = 2 - - MAX_AGE = 25 - - DIFF_KERNEL = np.array([ - [1.0 / 16, 1.0 / 16, 1.0 / 16], - [1.0 / 16, 1.0 / 2, 1.0 / 16], - [1.0 / 16, 1.0 / 16, 1.0 / 16] - ]) - - def __init__(self, gridsize: tuple[int, int], pct_white: float, pct_black: float) -> None: - super().__init__(no.LinearTimeline(0, 1), no.MonteCarlo.deterministic_independent_stream) - - p = [pct_white, pct_black, 1 - pct_white - pct_black] - init_pop = self.mc.sample(np.prod(gridsize), p).reshape(gridsize) - - self.domain = no.StateGrid(init_pop, edge=no.Edge.WRAP) - self.age = (self.mc.ustream(self.domain.state.size) * DaisyWorld.MAX_AGE).astype(int).reshape(self.domain.state.shape) - self.temperature = np.zeros(self.domain.state.shape) - - self.albedo = np.array([0.4, 0.75, 0.25]) - - self.temperature = self.__calc_local_heating() - self.__diffuse() - - print(self.domain.state) - # print(self.age) - # print(self.temperature) - - self.fig, self.img = self.__init_visualisation() - - def step(self) -> None: - self.age += 1 - - # update temperature - self.temperature = 0.5 * (self.temperature + self.__calc_local_heating()) - self.__diffuse() - no.log(f"mean temp = {np.mean(self.temperature)}") - - # update daisies - self.age = np.where( - np.logical_or( - self.age >= DaisyWorld.MAX_AGE, - self.domain.state == DaisyWorld.EMPTY), - 0, self.age) - # kill old - self.domain.state = np.where(self.age == 0, DaisyWorld.EMPTY, self.domain.state) - - # spawn new - p_seed = np.clip(0.1457 * self.temperature - 0.0032 * self.temperature ** 2 - 0.6443, 0, 1) - p_seed_white = np.where(self.domain.state == DaisyWorld.WHITE_DAISY, p_seed, 0) - p_seed_black = np.where(self.domain.state == DaisyWorld.BLACK_DAISY, p_seed, 0) - - d = [self.timeline.index % 3 - 1, self.timeline.index // 3 % 3 - 1] - - new_white = np.logical_and(np.roll(self.mc.hazard(p_seed_white), d, axis=[0, 1]), self.domain.state == DaisyWorld.EMPTY) - self.domain.state = np.where(new_white, DaisyWorld.WHITE_DAISY, self.domain.state) - self.age = np.where(new_white, 0, self.age) - - new_black = np.logical_and(np.roll(self.mc.hazard(p_seed_black), d, axis=[0, 1]), self.domain.state == DaisyWorld.EMPTY) - self.domain.state = np.where(new_black, DaisyWorld.BLACK_DAISY, self.domain.state) - self.age = np.where(new_black, 0, self.age) - - # self.halt() - # spawners = self.mc.hazard(p_seed_white) - - self.__update_visualisation() - - # sleep(0.1) - - if self.timeline.index > 3000: - self.halt() - - def __calc_local_heating(self) -> np.ndarray[np.float64, np.dtype[np.float64]]: - # local_heating = 0 - - # get absorbed luminosity from state - def fs(state: np.ndarray[np.int64, np.dtype[np.int64]]) -> np.ndarray[np.float64, np.dtype[np.float64]]: - return (1.0 - self.albedo[state]) * solar_luminosity - abs_lum = fs(self.domain.state) - - # get local heating from absorbed luminosity - def fl(lum: np.ndarray[np.float64, np.dtype[np.float64]]) -> np.ndarray[np.float64, np.dtype[np.float64]]: - return 72.0 * np.log(lum) + 80.0 - return fl(abs_lum) - - def __diffuse(self) -> None: - padded = np.pad(self.temperature, pad_width=1, mode="wrap") - self.temperature = signal.convolve(padded, DaisyWorld.DIFF_KERNEL, mode="same", method="direct")[1:-1, 1:-1] - - def __init_visualisation(self) -> tuple[plt.Figure, AxesImage]: - - # TODO copy wolf-sheep - plt.ion() - - cmap = colors.ListedColormap(['blue', 'white', 'black']) - - fig = plt.figure(constrained_layout=True, figsize=(6, 6)) - img = plt.imshow(self.domain.state.T, cmap=cmap) - plt.axis('off') - fig.canvas.mpl_connect('key_press_event', lambda event: self.halt() if event.key == "q" else None) - fig.canvas.flush_events() - - return fig, img - - def __update_visualisation(self) -> None: - self.img.set_array(self.domain.state.T) - # plt.savefig("/tmp/daisyworld%04d.png" % self.timeline.index, dpi=80) - self.fig.canvas.flush_events() + EMPTY = 0 + WHITE_DAISY = 1 + BLACK_DAISY = 2 + + MAX_AGE = 25 + + DIFF_KERNEL = np.array( + [ + [1.0 / 16, 1.0 / 16, 1.0 / 16], + [1.0 / 16, 1.0 / 2, 1.0 / 16], + [1.0 / 16, 1.0 / 16, 1.0 / 16], + ] + ) + + def __init__( + self, gridsize: tuple[int, int], pct_white: float, pct_black: float + ) -> None: + super().__init__( + no.LinearTimeline(0, 1), no.MonteCarlo.deterministic_independent_stream + ) + + p = [pct_white, pct_black, 1 - pct_white - pct_black] + init_pop = self.mc.sample(np.prod(gridsize), p).reshape(gridsize) + + self.domain = no.StateGrid(init_pop, edge=no.Edge.WRAP) + self.age = ( + (self.mc.ustream(self.domain.state.size) * DaisyWorld.MAX_AGE) + .astype(int) + .reshape(self.domain.state.shape) + ) + self.temperature = np.zeros(self.domain.state.shape) + + self.albedo = np.array([0.4, 0.75, 0.25]) + + self.temperature = self.__calc_local_heating() + self.__diffuse() + + print(self.domain.state) + # print(self.age) + # print(self.temperature) + + self.fig, self.img = self.__init_visualisation() + + def step(self) -> None: + self.age += 1 + + # update temperature + self.temperature = 0.5 * (self.temperature + self.__calc_local_heating()) + self.__diffuse() + no.log(f"mean temp = {np.mean(self.temperature)}") + + # update daisies + self.age = np.where( + np.logical_or( + self.age >= DaisyWorld.MAX_AGE, self.domain.state == DaisyWorld.EMPTY + ), + 0, + self.age, + ) + # kill old + self.domain.state = np.where(self.age == 0, DaisyWorld.EMPTY, self.domain.state) + + # spawn new + p_seed = np.clip( + 0.1457 * self.temperature - 0.0032 * self.temperature**2 - 0.6443, 0, 1 + ) + p_seed_white = np.where(self.domain.state == DaisyWorld.WHITE_DAISY, p_seed, 0) + p_seed_black = np.where(self.domain.state == DaisyWorld.BLACK_DAISY, p_seed, 0) + + d = [self.timeline.index % 3 - 1, self.timeline.index // 3 % 3 - 1] + + new_white = np.logical_and( + np.roll(self.mc.hazard(p_seed_white), d, axis=[0, 1]), + self.domain.state == DaisyWorld.EMPTY, + ) + self.domain.state = np.where( + new_white, DaisyWorld.WHITE_DAISY, self.domain.state + ) + self.age = np.where(new_white, 0, self.age) + + new_black = np.logical_and( + np.roll(self.mc.hazard(p_seed_black), d, axis=[0, 1]), + self.domain.state == DaisyWorld.EMPTY, + ) + self.domain.state = np.where( + new_black, DaisyWorld.BLACK_DAISY, self.domain.state + ) + self.age = np.where(new_black, 0, self.age) + + # self.halt() + # spawners = self.mc.hazard(p_seed_white) + + self.__update_visualisation() + + # sleep(0.1) + + if self.timeline.index > 3000: + self.halt() + + def __calc_local_heating(self) -> np.ndarray[np.float64, np.dtype[np.float64]]: + # local_heating = 0 + + # get absorbed luminosity from state + def fs( + state: np.ndarray[np.int64, np.dtype[np.int64]] + ) -> np.ndarray[np.float64, np.dtype[np.float64]]: + return (1.0 - self.albedo[state]) * solar_luminosity + + abs_lum = fs(self.domain.state) + + # get local heating from absorbed luminosity + def fl( + lum: np.ndarray[np.float64, np.dtype[np.float64]] + ) -> np.ndarray[np.float64, np.dtype[np.float64]]: + return 72.0 * np.log(lum) + 80.0 + + return fl(abs_lum) + + def __diffuse(self) -> None: + padded = np.pad(self.temperature, pad_width=1, mode="wrap") + self.temperature = signal.convolve( + padded, DaisyWorld.DIFF_KERNEL, mode="same", method="direct" + )[1:-1, 1:-1] + + def __init_visualisation(self) -> tuple[plt.Figure, AxesImage]: + # TODO copy wolf-sheep + plt.ion() + + cmap = colors.ListedColormap(["blue", "white", "black"]) + + fig = plt.figure(constrained_layout=True, figsize=(6, 6)) + img = plt.imshow(self.domain.state.T, cmap=cmap) + plt.axis("off") + fig.canvas.mpl_connect( + "key_press_event", lambda event: self.halt() if event.key == "q" else None + ) + fig.canvas.flush_events() + + return fig, img + + def __update_visualisation(self) -> None: + self.img.set_array(self.domain.state.T) + # plt.savefig("/tmp/daisyworld%04d.png" % self.timeline.index, dpi=80) + self.fig.canvas.flush_events() if __name__ == "__main__": - m = DaisyWorld((100, 100), 0.25, 0.2) - no.run(m) - # print(m.temperature) - + m = DaisyWorld((100, 100), 0.25, 0.2) + no.run(m) + # print(m.temperature) diff --git a/examples/hello_world/model.py b/examples/hello_world/model.py index 638e8fd6..6dd6b795 100644 --- a/examples/hello_world/model.py +++ b/examples/hello_world/model.py @@ -8,6 +8,7 @@ # !class! import pandas as pd + import neworder # uncomment for verbose output @@ -15,89 +16,99 @@ class HelloWorld(neworder.Model): - """ - This model extends the builtin neworder.Model class by providing - implementations of the following methods: - - modify (optional) - - step - - check (optional) - - finalise (optional) - The neworder.run() function will execute the model, looping over - the timeline and calling the methods above - """ -# !class! - - # !constructor! - def __init__(self, n: int, p: float) -> None: - """ - We create a null timeline, corresponding to a single instantaneous - transition, and initialise the base class with this plus a - randomly-seeded Monte-Carlo engine - - NB it is *essential* to initialise the base class. """ - super().__init__(neworder.NoTimeline(), neworder.MonteCarlo.nondeterministic_stream) - - # create a silent population of size n - self.population = pd.DataFrame(index=neworder.df.unique_index(n), - data={"talkative": False}) - self.population.index.name = "id" - - # set the transition probability - self.p_talk = p - # !constructor! - - # def modify(self, rank): - # """ - # For parallel runs only, per-process state modifications can be - # made before the model runs, allowing for e.g. sensitivity analysis - # or splitting datasets across parallel model runs - # This method is optional. - # Arguments: self, rank (MPI process number) - # Returns: NoneType - # """ - # pass - - def __str__(self) -> str: + This model extends the builtin neworder.Model class by providing + implementations of the following methods: + - modify (optional) + - step + - check (optional) + - finalise (optional) + The neworder.run() function will execute the model, looping over + the timeline and calling the methods above """ - Returns a more readable name for verbose logging output, would - otherwise be something like - "<__main__.HelloWorld object at 0x7fe82792da90>" - """ - return self.__class__.__name__ - # !step! - def step(self) -> None: - """ - Transitions to run at each timestep. - This method must be implemented. - Arguments: self - Returns: NoneType - """ - # randomly make some people talkative - self.population.talkative = self.mc.hazard(self.p_talk, len(self.population)).astype(bool) - # !step! + # !class! + + # !constructor! + def __init__(self, n: int, p: float) -> None: + """ + We create a null timeline, corresponding to a single instantaneous + transition, and initialise the base class with this plus a + randomly-seeded Monte-Carlo engine + + NB it is *essential* to initialise the base class. + """ + super().__init__( + neworder.NoTimeline(), neworder.MonteCarlo.nondeterministic_stream + ) + + # create a silent population of size n + self.population = pd.DataFrame( + index=neworder.df.unique_index(n), data={"talkative": False} + ) + self.population.index.name = "id" + + # set the transition probability + self.p_talk = p + + # !constructor! + + # def modify(self, rank): + # """ + # For parallel runs only, per-process state modifications can be + # made before the model runs, allowing for e.g. sensitivity analysis + # or splitting datasets across parallel model runs + # This method is optional. + # Arguments: self, rank (MPI process number) + # Returns: NoneType + # """ + # pass + + def __str__(self) -> str: + """ + Returns a more readable name for verbose logging output, would + otherwise be something like + "<__main__.HelloWorld object at 0x7fe82792da90>" + """ + return self.__class__.__name__ + + # !step! + def step(self) -> None: + """ + Transitions to run at each timestep. + This method must be implemented. + Arguments: self + Returns: NoneType + """ + # randomly make some people talkative + self.population.talkative = self.mc.hazard( + self.p_talk, len(self.population) + ).astype(bool) + + # !step! + + # !finalise! + def finalise(self) -> None: + """ + This method (optional, if defined) is run at the end of the timeline + Arguments: self + Returns: NoneType + """ + for i, r in self.population.iterrows(): + if r.talkative: + neworder.log(f"Hello from {i}") + + # !finalise! + + # def check(self) -> bool: + # """ + # Custom checks can be made after every timestep during the simulation. + # This method is optional + # Arguments: self + # Returns: bool + # """ + # return True - # !finalise! - def finalise(self) -> None: - """ - This method (optional, if defined) is run at the end of the timeline - Arguments: self - Returns: NoneType - """ - for i, r in self.population.iterrows(): - if r.talkative: - neworder.log(f"Hello from {i}") - # !finalise! - - # def check(self) -> bool: - # """ - # Custom checks can be made after every timestep during the simulation. - # This method is optional - # Arguments: self - # Returns: bool - # """ - # return True # !script! # uncomment for verbose output @@ -112,5 +123,5 @@ def finalise(self) -> None: # run the model and check it worked ok = neworder.run(hello_world) if not ok: - neworder.log("model failed!") + neworder.log("model failed!") # !script! diff --git a/examples/infection/infection.py b/examples/infection/infection.py index ca5d482e..5f8bd936 100644 --- a/examples/infection/infection.py +++ b/examples/infection/infection.py @@ -1,167 +1,228 @@ -from typing import Any from enum import Enum from time import sleep +from typing import Any + +import geopandas as gpd import matplotlib.pyplot as plt import numpy as np - -from neworder.geospatial import GeospatialGraph import osmnx as ox +import pandas as pd from shapely import line_interpolate_point -import geopandas as gpd import neworder as no +from neworder.geospatial import GeospatialGraph + class Status(Enum): - SUSCEPTIBLE = 0 - INFECTED = 1 - IMMUNE = 2 - DEAD = 3 - - @property - def rgba(self) -> tuple[float, float, float, float]: - match self: - case Status.SUSCEPTIBLE: - return (1.0, 1.0, 1.0, 1.0) - case Status.INFECTED: - return (1.0, 0.0, 0.0, 1.0) - case Status.IMMUNE: - return (0.0, 1.0, 0.0, 1.0) - case Status.DEAD: - return (0.0, 0.0, 0.0, 1.0) + SUSCEPTIBLE = 0 + INFECTED = 1 + IMMUNE = 2 + DEAD = 3 + + @property + def rgba(self) -> tuple[float, float, float, float]: + match self: + case Status.SUSCEPTIBLE: + return (1.0, 1.0, 1.0, 1.0) + case Status.INFECTED: + return (1.0, 0.0, 0.0, 1.0) + case Status.IMMUNE: + return (0.0, 1.0, 0.0, 1.0) + case Status.DEAD: + return (0.0, 0.0, 0.0, 1.0) class Infection(no.Model): - def __init__(self, - point: tuple[float, float], - dist: float, - n_agents: int, - n_infected: int, - speed: float, - infection_radius: float, - recovery_time: int, - mortality: float) -> None: - super().__init__(no.LinearTimeline(0.0, 1.0), no.MonteCarlo.deterministic_independent_stream) - # expose the model's MC engine to numpy - self.nprand = no.as_np(self.mc) - # create the spatial domain - self.domain = GeospatialGraph.from_point(point, dist, network_type="drive", crs='epsg:27700') - - # set the parameters - self.infection_radius = infection_radius - self.recovery_time = recovery_time - self.marginal_mortality = 1.0 - (1.0 - mortality) ** (1.0 / recovery_time) - - # create the agent data, which is stored in a geopandas geodataframe - start_positions = self.domain.all_nodes.sample(n=n_agents, random_state=self.nprand, replace=True).index.values - speeds = self.nprand.lognormal(np.log(speed), 0.2, n_agents) - agents = gpd.GeoDataFrame(data={"node": start_positions, "speed": speeds, "status": Status.SUSCEPTIBLE, "t_infect": no.time.NEVER}) - agents["dest"] = agents["node"].apply(self.__random_next_dest) - agents["path"] = agents[["node", "dest"]].apply(lambda r: self.domain.shortest_path(r["node"], r["dest"], weight="length"), axis=1) - agents["dist"] = agents.path.apply(lambda p: p.length) - agents["offset"] = 0.0 - agents["geometry"] = agents["path"].apply(lambda linestr: line_interpolate_point(linestr, 0)) - infected = self.nprand.choice(agents.index, n_infected, replace=False) - agents.loc[infected, "status"] = Status.INFECTED - agents.loc[infected, "t_infect"] = self.timeline.index - - self.agents = agents - self.fig, self.g = self.__init_visualisation() - - def step(self) -> None: - self.__update_position() - self.__infect_nearby() - self.__recover() - self.__succumb() - num_infected = (self.agents.status == Status.INFECTED).sum() - num_immune = (self.agents.status == Status.IMMUNE).sum() - num_dead = (self.agents.status == Status.DEAD).sum() - self.__update_visualisation(num_infected, num_immune, num_dead) - if num_infected == 0: - sleep(5) - self.halt() - self.finalise() - - def finalise(self) -> None: - no.log(f"total steps: {self.timeline.index}") - no.log(f"infections: {len(self.agents.t_infect.dropna())}") - no.log(f"recoveries: {(self.agents.status == Status.IMMUNE).sum()}") - no.log(f"deaths: {(self.agents.status == Status.DEAD).sum()}") - no.log(f"unaffected: {(self.agents.status == Status.SUSCEPTIBLE).sum()}") - - def __random_next_dest(self, node: int) -> int: - # ensure dest is different from origin - dest = node - while dest == node: - dest = self.domain.all_nodes.sample(n=1, random_state=self.nprand).index.values[0] - return dest - - def __update_position(self) -> None: - self.agents.offset += self.agents.speed - # move agent along its route - self.agents["geometry"] = self.agents[["path", "offset"]].apply(lambda r: line_interpolate_point(r["path"], r["offset"]), axis=1) - # check if arrived at destination and set a new destination if necessary - overshoots = self.agents.offset >= self.agents.dist - if not overshoots.empty: - # offset <- offset - dist - self.agents.loc[overshoots, "offset"] -= self.agents.loc[overshoots, "dist"] - # node <- dest - self.agents.loc[overshoots, "node"] = self.agents.loc[overshoots, "dest"] - # dest <- random - self.agents.loc[overshoots, "dest"] = self.agents.loc[overshoots, "node"].apply(self.__random_next_dest) - # path <- (node, dest), dist <- new_dist - self.agents.loc[overshoots, "path"] = self.agents.loc[overshoots, ["node", "dest"]] \ - .apply(lambda r: self.domain.shortest_path(r["node"], r["dest"], weight="length"), axis=1) - self.agents.loc[overshoots, "dist"] = self.agents.loc[overshoots, "path"].apply(lambda p: p.length) - # finally update position - self.agents.loc[overshoots, "geometry"] = self.agents.loc[overshoots, "path"].apply(lambda linestr: line_interpolate_point(linestr, 0)) - - def __infect_nearby(self) -> None: - infected = self.agents[self.agents.status == Status.INFECTED].geometry - susceptible = self.agents[self.agents.status == Status.SUSCEPTIBLE].geometry - new_infections = [] - # loop over smallest group for efficiency - if len(infected) < len(susceptible): - for i in infected: - new = susceptible.geometry.distance(i) < self.infection_radius - # new[new].index gives us only the index values corresponding to True - new_infections.extend(new[new].index) - else: - for i, p in susceptible.items(): - new = infected.geometry.distance(p) < self.infection_radius - if new.any(): - new_infections.append(i) - self.agents.loc[new_infections, "status"] = Status.INFECTED - self.agents.loc[new_infections, "t_infect"] = self.timeline.index - - def __recover(self) -> None: - t = self.timeline.index - self.agents.loc[(t - self.agents.t_infect >= self.recovery_time) & (self.agents.status == Status.INFECTED), "status"] = Status.IMMUNE - - def __succumb(self) -> None: - infected = self.agents[self.agents.status == Status.INFECTED] - death = self.mc.hazard(self.marginal_mortality, len(infected)).astype(bool) - self.agents.loc[infected[death].index.values, "status"] = Status.DEAD - self.agents.loc[infected[death].index.values, "speed"] = 0.0 - - def __init_visualisation(self) -> tuple[Any, Any]: - plt.ion() - fig, ax = ox.plot_graph(self.domain.graph, bgcolor="w", node_size=5, edge_linewidth=2, edge_color="#777777", figsize=(12,9)) - plt.tight_layout() - # optionally add a basemap: - # import contextily as ctx - # ctx.add_basemap(ax, crs=self.domain.crs, url=ctx.providers.OpenTopoMap) - g = ax.scatter(self.agents.geometry.x, self.agents.geometry.y, color=self.agents.status.apply(lambda c: c.rgba), edgecolor='k') - fig.suptitle("[q to quit]") - fig.canvas.mpl_connect('key_press_event', lambda event: self.halt() if event.key == "q" else None) - fig.canvas.flush_events() - return fig, g - - def __update_visualisation(self, num_infected, num_immune, num_dead) -> None: - offsets = np.array(list(zip(self.agents.geometry.x, self.agents.geometry.y))) - colours = self.agents.status.apply(lambda c: c.rgba) - self.g.set_offsets(offsets) - self.g.set_facecolors(colours) - self.fig.suptitle(f"step {self.timeline.index}: inf={num_infected} imm={num_immune} dead={num_dead} / {len(self.agents)} [q to quit]") - - self.fig.canvas.flush_events() - + def __init__( + self, + point: tuple[float, float], + dist: float, + n_agents: int, + n_infected: int, + speed: float, + infection_radius: float, + recovery_time: int, + mortality: float, + ) -> None: + super().__init__( + no.LinearTimeline(0.0, 1.0), no.MonteCarlo.deterministic_independent_stream + ) + # expose the model's MC engine to numpy + self.nprand = no.as_np(self.mc) + # create the spatial domain + self.domain = GeospatialGraph.from_point( + point, dist, network_type="drive", crs="epsg:27700" + ) + + # set the parameters + self.infection_radius = infection_radius + self.recovery_time = recovery_time + self.marginal_mortality = 1.0 - (1.0 - mortality) ** (1.0 / recovery_time) + + # create the agent data, which is stored in a geopandas geodataframe + start_positions = self.domain.all_nodes.sample( + n=n_agents, random_state=self.nprand, replace=True + ).index.values + speeds = self.nprand.lognormal(np.log(speed), 0.2, n_agents) + agents = pd.DataFrame( + data={ + "node": start_positions, + "speed": speeds, + "status": Status.SUSCEPTIBLE, + "t_infect": no.time.NEVER, + } + ) + agents["dest"] = agents["node"].apply(self.__random_next_dest) + agents["path"] = agents[["node", "dest"]].apply( + lambda r: self.domain.shortest_path(r["node"], r["dest"], weight="length"), + axis=1, + ) + agents["dist"] = agents.path.apply(lambda p: p.length).astype(float) + agents["offset"] = 0.0 + infected = self.nprand.choice(agents.index, n_infected, replace=False) + agents.loc[infected, "status"] = Status.INFECTED + agents.loc[infected, "t_infect"] = self.timeline.index + + self.agents = gpd.GeoDataFrame( + agents, + geometry=agents["path"].apply( + lambda linestr: line_interpolate_point(linestr, 0) + ), + ) + self.fig, self.g = self.__init_visualisation() + + def step(self) -> None: + self.__update_position() + self.__infect_nearby() + self.__recover() + self.__succumb() + num_infected = (self.agents.status == Status.INFECTED).sum() + num_immune = (self.agents.status == Status.IMMUNE).sum() + num_dead = (self.agents.status == Status.DEAD).sum() + self.__update_visualisation(num_infected, num_immune, num_dead) + if num_infected == 0: + sleep(5) + self.halt() + self.finalise() + + def finalise(self) -> None: + no.log(f"total steps: {self.timeline.index}") + no.log(f"infections: {len(self.agents.t_infect.dropna())}") + no.log(f"recoveries: {(self.agents.status == Status.IMMUNE).sum()}") + no.log(f"deaths: {(self.agents.status == Status.DEAD).sum()}") + no.log(f"unaffected: {(self.agents.status == Status.SUSCEPTIBLE).sum()}") + + def __random_next_dest(self, node: int) -> int: + # ensure dest is different from origin + dest = node + while dest == node: + dest = self.domain.all_nodes.sample( + n=1, random_state=self.nprand + ).index.values[0] + return dest + + def __update_position(self) -> None: + self.agents.offset += self.agents.speed + # move agent along its route + self.agents["geometry"] = self.agents[["path", "offset"]].apply( + lambda r: line_interpolate_point(r["path"], r["offset"]), axis=1 + ) + # check if arrived at destination and set a new destination if necessary + overshoots = self.agents.offset >= self.agents.dist + if not overshoots.empty: + # offset <- offset - dist + self.agents.loc[overshoots, "offset"] -= self.agents.loc[overshoots, "dist"] + # node <- dest + self.agents.loc[overshoots, "node"] = self.agents.loc[overshoots, "dest"] + # dest <- random + self.agents.loc[overshoots, "dest"] = self.agents.loc[ + overshoots, "node" + ].apply(self.__random_next_dest) + # path <- (node, dest), dist <- new_dist + self.agents.loc[overshoots, "path"] = self.agents.loc[ + overshoots, ["node", "dest"] + ].apply( + lambda r: self.domain.shortest_path( + r["node"], r["dest"], weight="length" + ), + axis=1, + ) + self.agents.loc[overshoots, "dist"] = ( + self.agents.loc[overshoots, "path"] + .apply(lambda p: p.length) + .astype(float) + ) + # finally update position + self.agents.loc[overshoots, "geometry"] = self.agents.loc[ + overshoots, "path" + ].apply(lambda linestr: line_interpolate_point(linestr, 0)) + + def __infect_nearby(self) -> None: + infected = self.agents[self.agents.status == Status.INFECTED].geometry + susceptible = self.agents[self.agents.status == Status.SUSCEPTIBLE].geometry + new_infections = [] + # loop over smallest group for efficiency + if len(infected) < len(susceptible): + for i in infected: + new = susceptible.geometry.distance(i) < self.infection_radius + # new[new].index gives us only the index values corresponding to True + new_infections.extend(new[new].index) + else: + for i, p in susceptible.items(): + new = infected.geometry.distance(p) < self.infection_radius + if new.any(): + new_infections.append(i) + self.agents.loc[new_infections, "status"] = Status.INFECTED + self.agents.loc[new_infections, "t_infect"] = self.timeline.index + + def __recover(self) -> None: + t = self.timeline.index + self.agents.loc[ + (t - self.agents.t_infect >= self.recovery_time) + & (self.agents.status == Status.INFECTED), + "status", + ] = Status.IMMUNE + + def __succumb(self) -> None: + infected = self.agents[self.agents.status == Status.INFECTED] + death = self.mc.hazard(self.marginal_mortality, len(infected)).astype(bool) + self.agents.loc[infected[death].index.values, "status"] = Status.DEAD + self.agents.loc[infected[death].index.values, "speed"] = 0.0 + + def __init_visualisation(self) -> tuple[Any, Any]: + plt.ion() + fig, ax = ox.plot_graph( + self.domain.graph, + bgcolor="w", + node_size=5, + edge_linewidth=2, + edge_color="#777777", + figsize=(12, 9), + ) + plt.tight_layout() + # optionally add a basemap: + # import contextily as ctx + # ctx.add_basemap(ax, crs=self.domain.crs, url=ctx.providers.OpenTopoMap) + g = ax.scatter( + self.agents.geometry.x, + self.agents.geometry.y, + color=self.agents.status.apply(lambda c: c.rgba), + edgecolor="k", + ) + fig.suptitle("[q to quit]") + fig.canvas.mpl_connect( + "key_press_event", lambda event: self.halt() if event.key == "q" else None + ) + fig.canvas.flush_events() + return fig, g + + def __update_visualisation(self, num_infected, num_immune, num_dead) -> None: + offsets = np.array(list(zip(self.agents.geometry.x, self.agents.geometry.y))) + colours = self.agents.status.apply(lambda c: c.rgba) + self.g.set_offsets(offsets) + self.g.set_facecolors(colours) + self.fig.suptitle( + f"step {self.timeline.index}: inf={num_infected} imm={num_immune} dead={num_dead} / {len(self.agents)} [q to quit]" + ) + + self.fig.canvas.flush_events() diff --git a/examples/infection/run.py b/examples/infection/run.py index ace28989..64fb3ed2 100644 --- a/examples/infection/run.py +++ b/examples/infection/run.py @@ -1,23 +1,23 @@ -import neworder as no - from infection import Infection +import neworder as no + # centroid (latlon) -point = (53.925, -1.822) +POINT = (53.925, -1.822) # area size (m) -dist = 2000 +RADIUS = 2000 # number of agents -N = 1000 +NUM_AGENTS = 1000 # initial number of infected agents -I = 1 +NUM_INFECTED = 1 # mean speed of movement -speed = 10 +SPEED = 10 # max distance an infection can occur -infection_radius = 1 +INFECTION_RADIUS = 1 # number of steps infected before immunity -recovery_time = 100 +RECOVERY_TIME = 100 # probability of dying from infection at any point during the infection -mortality = 0.01 +MORTALITY = 0.01 -m = Infection(point, dist, N, I, speed, infection_radius, recovery_time, mortality) +m = Infection(POINT, RADIUS, NUM_AGENTS, NUM_INFECTED, SPEED, INFECTION_RADIUS, RECOVERY_TIME, MORTALITY) no.run(m) diff --git a/examples/markov_chain/markov_chain.py b/examples/markov_chain/markov_chain.py index a80e8f5e..7bf91175 100644 --- a/examples/markov_chain/markov_chain.py +++ b/examples/markov_chain/markov_chain.py @@ -1,57 +1,71 @@ -import pandas as pd # type: ignore import numpy as np +import pandas as pd # type: ignore + import neworder as no class MarkovChain(no.Model): - def __init__(self, timeline: no.Timeline, npeople: int, states: np.ndarray, transition_matrix: np.ndarray) -> None: - - super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) - self.npeople = npeople - - self.pop = pd.DataFrame(data={"state": np.full(npeople, 0), - "t1": no.time.NEVER, - "t2": no.time.NEVER}) - - self.states = states - self.transition_matrix = transition_matrix - self.summary = pd.DataFrame(columns=states) - self.summary.loc[0] = self.pop.state.value_counts().transpose() + def __init__( + self, + timeline: no.Timeline, + npeople: int, + states: np.ndarray, + transition_matrix: np.ndarray, + ) -> None: + super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) + self.npeople = npeople - # pure python equivalent implementation of no.df.transition, to illustrate the performance gain - def transition_py(self, colname: str) -> None: - def _interp(cumprob: np.ndarray, x: float) -> int: - lbound = 0 - while lbound < len(cumprob) - 1: - if cumprob[lbound] > x: - break - lbound += 1 - return lbound + self.pop = pd.DataFrame( + data={ + "state": np.full(npeople, 0), + "t1": no.time.NEVER, + "t2": no.time.NEVER, + } + ) - def _sample(u: float, tc: np.ndarray, c: np.ndarray) -> float: - return c[_interp(tc, u)] + self.states = states + self.transition_matrix = transition_matrix + self.summary = pd.DataFrame(columns=states) + self.summary.loc[0] = self.pop.state.value_counts().transpose() - # u = m.mc.ustream(len(df)) - tc = np.cumsum(self.transition_matrix, axis=1) + # pure python equivalent implementation of no.df.transition, to illustrate the performance gain + def transition_py(self, colname: str) -> None: + def _interp(cumprob: np.ndarray, x: float) -> int: + lbound = 0 + while lbound < len(cumprob) - 1: + if cumprob[lbound] > x: + break + lbound += 1 + return lbound - # reverse mapping of category label to index - lookup = {self.states[i]: i for i in range(len(self.states))} + def _sample(u: float, tc: np.ndarray, c: np.ndarray) -> float: + return c[_interp(tc, u)] - # for i in range(len(df)): - # current = df.loc[i, colname] - # df.loc[i, colname] = sample(u[i], tc[lookup[current]], c) - # this is a much faster equivalent of the loop in the commented code immediately above - self.pop[colname] = self.pop[colname].apply(lambda current: _sample(self.mc.ustream(1), tc[lookup[current]], self.states)) + # u = m.mc.ustream(len(df)) + tc = np.cumsum(self.transition_matrix, axis=1) - def step(self) -> None: - # self.transition_py("state") - # comment the above line and uncomment this line to use the faster C++ implementation - no.df.transition(self, self.states, self.transition_matrix, self.pop, "state") - self.summary.loc[len(self.summary)] = self.pop.state.value_counts().transpose() + # reverse mapping of category label to index + lookup = {self.states[i]: i for i in range(len(self.states))} - def finalise(self) -> None: - self.summary["t"] = np.linspace(self.timeline.start, self.timeline.end, self.timeline.nsteps + 1) - self.summary.reset_index(drop=True, inplace=True) - self.summary.fillna(0, inplace=True) + # for i in range(len(df)): + # current = df.loc[i, colname] + # df.loc[i, colname] = sample(u[i], tc[lookup[current]], c) + # this is a much faster equivalent of the loop in the commented code immediately above + self.pop[colname] = self.pop[colname].apply( + lambda current: _sample( + self.mc.ustream(1), tc[lookup[current]], self.states + ) + ) + def step(self) -> None: + # self.transition_py("state") + # comment the above line and uncomment this line to use the faster C++ implementation + no.df.transition(self, self.states, self.transition_matrix, self.pop, "state") + self.summary.loc[len(self.summary)] = self.pop.state.value_counts().transpose() + def finalise(self) -> None: + self.summary["t"] = np.linspace( + self.timeline.start, self.timeline.end, self.timeline.nsteps + 1 + ) + self.summary.reset_index(drop=True, inplace=True) + self.summary.fillna(0, inplace=True) diff --git a/examples/markov_chain/model.py b/examples/markov_chain/model.py index 8390e71b..2c6eb1a8 100644 --- a/examples/markov_chain/model.py +++ b/examples/markov_chain/model.py @@ -1,9 +1,10 @@ - import time + import numpy as np -import neworder as no -from markov_chain import MarkovChain import visualisation +from markov_chain import MarkovChain + +import neworder as no # Logging and checking options # no.verbose() @@ -29,11 +30,13 @@ # 0 -> 1 # \ \ # <-> 2 -transition_matrix = np.array([ - [1.0 - lambda_01 * dt - lambda_02 * dt, lambda_01 * dt, lambda_02 * dt ], - [0.0, 1.0 - lambda_12 * dt, lambda_12 * dt ], - [lambda_20 * dt, 0.0, 1.0 - lambda_20 * dt] -]) +transition_matrix = np.array( + [ + [1.0 - lambda_01 * dt - lambda_02 * dt, lambda_01 * dt, lambda_02 * dt], + [0.0, 1.0 - lambda_12 * dt, lambda_12 * dt], + [lambda_20 * dt, 0.0, 1.0 - lambda_20 * dt], + ] +) timeline = no.LinearTimeline(0, tmax, tmax) diff --git a/examples/markov_chain/visualisation.py b/examples/markov_chain/visualisation.py index 9e23e276..d34c8a92 100644 --- a/examples/markov_chain/visualisation.py +++ b/examples/markov_chain/visualisation.py @@ -1,18 +1,23 @@ - -from matplotlib import pyplot as plt # type: ignore from markov_chain import MarkovChain +from matplotlib import pyplot as plt # type: ignore + def show(model: MarkovChain) -> None: - # this seems to have a bug - # model.summary.plot(kind='bar', width=1.0, stacked=True) - dt = model.timeline.dt - plt.bar(model.summary.t, model.summary[0], width=dt) # , stacked=True) - plt.bar(model.summary.t, model.summary[1], width=dt, bottom=model.summary[0]) - plt.bar(model.summary.t, model.summary[2], width=dt, bottom=model.summary[0] + model.summary[1]) - plt.legend(["State 0", "State 1", "State 2"]) - plt.title("State occupancy") - plt.ylabel("Count") - plt.xlabel("Time") + # this seems to have a bug + # model.summary.plot(kind='bar', width=1.0, stacked=True) + dt = model.timeline.dt + plt.bar(model.summary.t, model.summary[0], width=dt) # , stacked=True) + plt.bar(model.summary.t, model.summary[1], width=dt, bottom=model.summary[0]) + plt.bar( + model.summary.t, + model.summary[2], + width=dt, + bottom=model.summary[0] + model.summary[1], + ) + plt.legend(["State 0", "State 1", "State 2"]) + plt.title("State occupancy") + plt.ylabel("Count") + plt.xlabel("Time") - # plt.savefig("docs/examples/img/markov_chain.png") - plt.show() + # plt.savefig("docs/examples/img/markov_chain.png") + plt.show() diff --git a/examples/mortality/model.py b/examples/mortality/model.py index c4667152..8647de8c 100644 --- a/examples/mortality/model.py +++ b/examples/mortality/model.py @@ -1,11 +1,13 @@ import time -import neworder # model implementations -from people import PeopleDiscrete, PeopleContinuous +from people import PeopleContinuous, PeopleDiscrete + # visualisation code from plot import plot +import neworder + # neworder.verbose() # checks disabled to emphasise performance differences neworder.checked(False) @@ -23,14 +25,20 @@ start = time.perf_counter() neworder.run(mortality_discrete) end = time.perf_counter() -neworder.log("Discrete model life expectancy = %f, exec time = %f" % (mortality_discrete.life_expectancy, end - start)) +neworder.log( + "Discrete model life expectancy = %f, exec time = %f" + % (mortality_discrete.life_expectancy, end - start) +) # run the continuous model mortality_continuous = PeopleContinuous(mortality_hazard_file, population_size, 1.0) start = time.perf_counter() neworder.run(mortality_continuous) end = time.perf_counter() -neworder.log("Continuous model life expectancy = %f, exec time = %f" % (mortality_continuous.life_expectancy, end - start)) +neworder.log( + "Continuous model life expectancy = %f, exec time = %f" + % (mortality_continuous.life_expectancy, end - start) +) # visualise some results # hist_file = "docs/examples/img/mortality_%dk.png" % (population_size//1000) diff --git a/examples/mortality/people.py b/examples/mortality/people.py index 5720044a..9728f016 100644 --- a/examples/mortality/people.py +++ b/examples/mortality/people.py @@ -1,106 +1,130 @@ - import numpy as np import pandas as pd # type: ignore + import neworder # !disc_ctor! class PeopleDiscrete(neworder.Model): - """ Persons sampled each represented as a row in a data frame """ - def __init__(self, mortality_hazard_file: str, n: int, max_age: float) -> None: - # This is case-based model the timeline refers to the age of the cohort - timeline = neworder.LinearTimeline(0.0, max_age, int(max_age)) - super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) + """Persons sampled each represented as a row in a data frame""" + + def __init__(self, mortality_hazard_file: str, n: int, max_age: float) -> None: + # This is case-based model the timeline refers to the age of the cohort + timeline = neworder.LinearTimeline(0.0, max_age, int(max_age)) + super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) + + # initialise cohort + self.mortality_hazard = pd.read_csv(mortality_hazard_file) + + # store the largest age we have a rate for + self.max_rate_age = max(self.mortality_hazard.DC1117EW_C_AGE) - 1 + + # neworder.log(self.mortality_hazard.head()) + self.population = pd.DataFrame( + index=neworder.df.unique_index(n), + data={"alive": True, "age": 0.0, "age_at_death": neworder.time.FAR_FUTURE}, + ) + + self.max_age = max_age + + # !disc_ctor! + + # !disc_step! + def step(self) -> None: + # kill off some people + self.die() + # age the living only + alive = self.population.loc[self.population.alive].index + self.population.loc[alive, "age"] = ( + self.population.loc[alive, "age"] + self.timeline.dt + ) + + # !disc_step! + + def check(self) -> bool: + neworder.log("pct alive = %f" % (100.0 * np.mean(self.population.alive))) + return True + + # !disc_finalise! + def finalise(self) -> None: + # kill off any survivors + self.die() + assert np.sum(self.population.alive) == 0 + # the calc life expectancy + self.life_expectancy = np.mean(self.population.age_at_death) + + # !disc_finalise! + + def die(self) -> None: + # using indexes to subset data as cannot store a reference to a subset of the dataframe (it just copies) + + # first filter out the already dead + alive = self.population.loc[self.population.alive].index + # sample time of death + r = self.mc.stopping( + self.mortality_hazard.Rate.values[ + min(self.timeline.index, self.max_rate_age) + ], + len(alive), + ) + # select if death happens before next timestep... + dt = self.timeline.dt + # at final timestep everybody dies (at some later time) so dt is infinite + if self.timeline.time == self.max_age: + dt = neworder.time.FAR_FUTURE + newly_dead = alive[r < dt] + + # kill off those who die before next timestep + self.population.loc[newly_dead, "alive"] = False + # and set the age at death according to the stopping time above + self.population.loc[newly_dead, "age_at_death"] = ( + self.population.loc[newly_dead, "age"] + r[r < dt] + ) - # initialise cohort - self.mortality_hazard = pd.read_csv(mortality_hazard_file) - # store the largest age we have a rate for - self.max_rate_age = max(self.mortality_hazard.DC1117EW_C_AGE) - 1 +# !cont_ctor! +class PeopleContinuous(neworder.Model): + """Persons sampled each represented as a row in a data frame""" - # neworder.log(self.mortality_hazard.head()) - self.population = pd.DataFrame(index=neworder.df.unique_index(n), - data={"alive": True, - "age": 0.0, - "age_at_death": neworder.time.FAR_FUTURE}) + def __init__(self, mortality_hazard_file: str, n: int, dt: float) -> None: + # Direct sampling doesnt require a timeline + super().__init__( + neworder.NoTimeline(), neworder.MonteCarlo.deterministic_identical_stream + ) + # initialise cohort + self.mortality_hazard = pd.read_csv(mortality_hazard_file) - self.max_age = max_age -# !disc_ctor! + # store the largest age we have a rate for + self.max_rate_age = max(self.mortality_hazard.DC1117EW_C_AGE) - 1 - # !disc_step! - def step(self) -> None: - # kill off some people - self.die() - # age the living only - alive = self.population.loc[self.population.alive].index - self.population.loc[alive, "age"] = self.population.loc[alive, "age"] + self.timeline.dt - # !disc_step! - - def check(self) -> bool: - neworder.log("pct alive = %f" % (100.0 * np.mean(self.population.alive))) - return True - - # !disc_finalise! - def finalise(self) -> None: - # kill off any survivors - self.die() - assert np.sum(self.population.alive) == 0 - # the calc life expectancy - self.life_expectancy = np.mean(self.population.age_at_death) - # !disc_finalise! - - def die(self) -> None: - # using indexes to subset data as cannot store a reference to a subset of the dataframe (it just copies) - - # first filter out the already dead - alive = self.population.loc[self.population.alive].index - # sample time of death - r = self.mc.stopping(self.mortality_hazard.Rate.values[min(self.timeline.index, self.max_rate_age)], len(alive)) - # select if death happens before next timestep... - dt = self.timeline.dt - # at final timestep everybody dies (at some later time) so dt is infinite - if self.timeline.time == self.max_age: - dt = neworder.time.FAR_FUTURE - newly_dead = alive[r < dt] - - # kill off those who die before next timestep - self.population.loc[newly_dead, "alive"] = False - # and set the age at death according to the stopping time above - self.population.loc[newly_dead, "age_at_death"] = self.population.loc[newly_dead, "age"] + r[r < dt] + # neworder.log(self.mortality_hazard.head()) + self.population = pd.DataFrame( + index=neworder.df.unique_index(n), + data={"age_at_death": neworder.time.FAR_FUTURE}, + ) + # the time interval of the mortality data values + self.dt = dt -# !cont_ctor! -class PeopleContinuous(neworder.Model): - """ Persons sampled each represented as a row in a data frame """ - def __init__(self, mortality_hazard_file: str, n: int, dt: float) -> None: - # Direct sampling doesnt require a timeline - super().__init__(neworder.NoTimeline(), neworder.MonteCarlo.deterministic_identical_stream) - # initialise cohort - self.mortality_hazard = pd.read_csv(mortality_hazard_file) - - # store the largest age we have a rate for - self.max_rate_age = max(self.mortality_hazard.DC1117EW_C_AGE) - 1 - - # neworder.log(self.mortality_hazard.head()) - self.population = pd.DataFrame(index=neworder.df.unique_index(n), - data={"age_at_death": neworder.time.FAR_FUTURE}) - - # the time interval of the mortality data values - self.dt = dt -# !cont_ctor! + # !cont_ctor! + + # !cont_step! + def step(self) -> None: + self.population.age_at_death = self.mc.first_arrival( + self.mortality_hazard.Rate.values, self.dt, len(self.population) + ) + + # !cont_step! + + # !cont_check! + def check(self) -> bool: + # ensure all times of death are finite + return self.population.age_at_death.isnull().sum() == 0 + + # !cont_check! + + # !cont_finalise! + def finalise(self) -> None: + self.life_expectancy = np.mean(self.population.age_at_death) - # !cont_step! - def step(self) -> None: - self.population.age_at_death = self.mc.first_arrival(self.mortality_hazard.Rate.values, self.dt, len(self.population)) - # !cont_step! - - # !cont_check! - def check(self) -> bool: - # ensure all times of death are finite - return self.population.age_at_death.isnull().sum() == 0 - # !cont_check! - - # !cont_finalise! - def finalise(self) -> None: - self.life_expectancy = np.mean(self.population.age_at_death) - # !cont_finalise! + # !cont_finalise! diff --git a/examples/mortality/plot.py b/examples/mortality/plot.py index 0a1d6b57..2c39b0af 100644 --- a/examples/mortality/plot.py +++ b/examples/mortality/plot.py @@ -1,54 +1,65 @@ from typing import Optional, Union + +import matplotlib.animation as animation # type: ignore +import matplotlib.pyplot as plt # type: ignore import numpy as np import pandas as pd # type: ignore -import matplotlib.pyplot as plt # type: ignore -import matplotlib.animation as animation # type: ignore class Hist: - def __init__(self, data: pd.DataFrame, numbins: int) -> None: - - fig, ax = plt.subplots() - # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.hist.html - self.n, _bins, self.patches = plt.hist(data, numbins, facecolor='black') - - ax.set_title("Discrete case-based mortality model (%d people)" % len(data)) - ax.set_xlabel("Age at Death") - ax.set_ylabel("Persons") - - self.anim = animation.FuncAnimation(fig, self.__animate, interval=100, frames=numbins, repeat=False) - - def save(self, filename: str) -> None: - # there seems to be no way of preventing passing the loop once setting to the saved gif and it loops forever, which is very annoying - self.anim.save(filename, dpi=80, writer=animation.ImageMagickWriter(extra_args=["-loop", "1"])) - - def show(self) -> None: - plt.show() - - def __animate(self, frameno: int) -> Union[list, list[list]]: - i = 0 - for rect, h in zip(self.patches, self.n): - rect.set_height(h if i <= frameno else 0) - i = i + 1 - return self.patches - - -def plot(pop_disc: pd.DataFrame, pop_cont: pd.DataFrame, filename: Optional[str]=None, anim_filename: Optional[str]=None) -> None: - bins = int(max(pop_disc.age_at_death.max(), pop_cont.age_at_death.max())) + 1 - rng = (0.0, float(bins)) - y1, x1 = np.histogram(pop_disc.age_at_death, bins, range=rng) - plt.plot(x1[1:], y1) - y2, x2 = np.histogram(pop_cont.age_at_death, bins, range=rng) - plt.plot(x2[1:], y2) - plt.title("Mortality model sampling algorithm comparison") - plt.legend(["Discrete", "Continuous"]) - plt.xlabel("Age at Death") - plt.ylabel("Persons") - - if filename is not None: - plt.savefig(filename, dpi=80) - - h = Hist(pop_disc.age_at_death, int(max(pop_disc.age))) - if anim_filename is not None: - h.save(anim_filename) - h.show() + def __init__(self, data: pd.DataFrame, numbins: int) -> None: + fig, ax = plt.subplots() + # see https://matplotlib.org/api/_as_gen/matplotlib.pyplot.hist.html + self.n, _bins, self.patches = plt.hist(data, numbins, facecolor="black") + + ax.set_title("Discrete case-based mortality model (%d people)" % len(data)) + ax.set_xlabel("Age at Death") + ax.set_ylabel("Persons") + + self.anim = animation.FuncAnimation( + fig, self.__animate, interval=100, frames=numbins, repeat=False + ) + + def save(self, filename: str) -> None: + # there seems to be no way of preventing passing the loop once setting to the saved gif and it loops forever, which is very annoying + self.anim.save( + filename, + dpi=80, + writer=animation.ImageMagickWriter(extra_args=["-loop", "1"]), + ) + + def show(self) -> None: + plt.show() + + def __animate(self, frameno: int) -> Union[list, list[list]]: + i = 0 + for rect, h in zip(self.patches, self.n): + rect.set_height(h if i <= frameno else 0) + i = i + 1 + return self.patches + + +def plot( + pop_disc: pd.DataFrame, + pop_cont: pd.DataFrame, + filename: Optional[str] = None, + anim_filename: Optional[str] = None, +) -> None: + bins = int(max(pop_disc.age_at_death.max(), pop_cont.age_at_death.max())) + 1 + rng = (0.0, float(bins)) + y1, x1 = np.histogram(pop_disc.age_at_death, bins, range=rng) + plt.plot(x1[1:], y1) + y2, x2 = np.histogram(pop_cont.age_at_death, bins, range=rng) + plt.plot(x2[1:], y2) + plt.title("Mortality model sampling algorithm comparison") + plt.legend(["Discrete", "Continuous"]) + plt.xlabel("Age at Death") + plt.ylabel("Persons") + + if filename is not None: + plt.savefig(filename, dpi=80) + + h = Hist(pop_disc.age_at_death, int(max(pop_disc.age))) + if anim_filename is not None: + h.save(anim_filename) + h.show() diff --git a/examples/n-body/__init__.py b/examples/n-body/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/examples/option/black_scholes.py b/examples/option/black_scholes.py index 275b963a..3eb48ebe 100644 --- a/examples/option/black_scholes.py +++ b/examples/option/black_scholes.py @@ -1,121 +1,132 @@ """ Black-Scholes model implementations: analytic and MC """ from typing import Any + import numpy as np +from helpers import norm_cdf, nstream + import neworder -from helpers import nstream, norm_cdf + # Subclass neworder.Model class BlackScholes(neworder.Model): - # !constructor! - def __init__(self, option: dict[str, Any], market: dict[str, float], nsims: int) -> None: - - # Using exact MC calc of GBM requires only 1 timestep - timeline = neworder.LinearTimeline(0.0, option["expiry"], 1) - super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) - - self.option = option - self.market = market - self.nsims = nsims - # !constructor! - - # !modifier! - def modify(self) -> None: - if neworder.mpi.RANK == 1: - self.market["spot"] *= 1.01 # delta/gamma up bump - elif neworder.mpi.RANK == 2: - self.market["spot"] *= 0.99 # delta/gamma down bump - elif neworder.mpi.RANK == 3: - self.market["vol"] += 0.001 # 10bp upward vega - # !modifier! - - # !step! - def step(self) -> None: - self.pv = self.simulate() - # !step! - - # !check! - def check(self) -> bool: - # check the rng streams are still in sync by sampling from each one, - # comparing, and broadcasting the result. If one process fails the - # check and exits without notifying the others, deadlocks can result. - # send the state representation to process 0 (others will get None) - states = neworder.mpi.COMM.gather(self.mc.state(), 0) - # process 0 checks the values - if states: - ok = all(s == states[0] for s in states) - else: - ok = True - # broadcast process 0's ok to all processes - ok = neworder.mpi.COMM.bcast(ok, root=0) - return ok - # !check! - - # !finalise! - def finalise(self) -> None: - # check and report accuracy - self.compare() - # compute and report some market risk - self.greeks() - # !finalise! - - def simulate(self) -> float: - # get the single timestep from the timeline - dt = self.timeline.dt - normals = nstream(self.mc.ustream(self.nsims)) - - # compute underlying prices at t=dt - S = self.market["spot"] - r = self.market["rate"] - q = self.market["divy"] - sigma = self.market["vol"] - underlyings = S * np.exp((r - q - 0.5 * sigma * sigma) * dt + normals * sigma * np.sqrt(dt)) - # compute option prices at t=dt - if self.option["callput"] == "CALL": - fv = (underlyings - self.option["strike"]).clip(min=0.0).mean() - else: - fv = (self.option["strike"] - underlyings).clip(min=0.0).mean() - - # discount back to val date - return fv * np.exp(-r * dt) - - def analytic(self) -> float: - """ Compute Black-Scholes European option price """ - S = self.market["spot"] - K = self.option["strike"] - r = self.market["rate"] - q = self.market["divy"] - T = self.option["expiry"] - vol = self.market["vol"] - - # neworder.log("%f %f %f %f %f %f" % (S, K, r, q, T, vol)) - - srt = vol * np.sqrt(T) - rqs2t = (r - q + 0.5 * vol * vol) * T - d1 = (np.log(S / K) + rqs2t) / srt - d2 = d1 - srt - df = np.exp(-r * T) - qf = np.exp(-q * T) - - if self.option["callput"] == "CALL": - return S * qf * norm_cdf(d1) - K * df * norm_cdf(d2) - else: - return -S * df * norm_cdf(-d1) + K * df * norm_cdf(-d2) - - def compare(self) -> bool: - """ Compare MC price to analytic """ - ref = self.analytic() - err = self.pv / ref - 1.0 - neworder.log("mc: {:.6f} / ref: {:.6f} err={:.2%}".format(self.pv, ref, err)) - # relative error should be within O(1/(sqrt(sims))) of analytic solution - return True if abs(err) <= 2.0 / np.sqrt(self.nsims) else False - - def greeks(self) -> None: - # get all the results - pvs = neworder.mpi.COMM.gather(self.pv, 0) - # compute sensitivities on rank 0 - if pvs: - neworder.log(f"PV={pvs[0]:.3f}") - neworder.log(f"delta={(pvs[1] - pvs[2]) / 2:.3f}") - neworder.log(f"gamma={(pvs[1] - 2 * pvs[0] + pvs[2]):.3f}") - neworder.log(f"vega 10bp={pvs[3] - pvs[0]:.3f}") + # !constructor! + def __init__( + self, option: dict[str, Any], market: dict[str, float], nsims: int + ) -> None: + # Using exact MC calc of GBM requires only 1 timestep + timeline = neworder.LinearTimeline(0.0, option["expiry"], 1) + super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) + + self.option = option + self.market = market + self.nsims = nsims + + # !constructor! + + # !modifier! + def modify(self) -> None: + if neworder.mpi.RANK == 1: + self.market["spot"] *= 1.01 # delta/gamma up bump + elif neworder.mpi.RANK == 2: + self.market["spot"] *= 0.99 # delta/gamma down bump + elif neworder.mpi.RANK == 3: + self.market["vol"] += 0.001 # 10bp upward vega + + # !modifier! + + # !step! + def step(self) -> None: + self.pv = self.simulate() + + # !step! + + # !check! + def check(self) -> bool: + # check the rng streams are still in sync by sampling from each one, + # comparing, and broadcasting the result. If one process fails the + # check and exits without notifying the others, deadlocks can result. + # send the state representation to process 0 (others will get None) + states = neworder.mpi.COMM.gather(self.mc.state(), 0) + # process 0 checks the values + if states: + ok = all(s == states[0] for s in states) + else: + ok = True + # broadcast process 0's ok to all processes + ok = neworder.mpi.COMM.bcast(ok, root=0) + return ok + + # !check! + + # !finalise! + def finalise(self) -> None: + # check and report accuracy + self.compare() + # compute and report some market risk + self.greeks() + + # !finalise! + + def simulate(self) -> float: + # get the single timestep from the timeline + dt = self.timeline.dt + normals = nstream(self.mc.ustream(self.nsims)) + + # compute underlying prices at t=dt + S = self.market["spot"] + r = self.market["rate"] + q = self.market["divy"] + sigma = self.market["vol"] + underlyings = S * np.exp( + (r - q - 0.5 * sigma * sigma) * dt + normals * sigma * np.sqrt(dt) + ) + # compute option prices at t=dt + if self.option["callput"] == "CALL": + fv = (underlyings - self.option["strike"]).clip(min=0.0).mean() + else: + fv = (self.option["strike"] - underlyings).clip(min=0.0).mean() + + # discount back to val date + return fv * np.exp(-r * dt) + + def analytic(self) -> float: + """Compute Black-Scholes European option price""" + S = self.market["spot"] + K = self.option["strike"] + r = self.market["rate"] + q = self.market["divy"] + T = self.option["expiry"] + vol = self.market["vol"] + + # neworder.log("%f %f %f %f %f %f" % (S, K, r, q, T, vol)) + + srt = vol * np.sqrt(T) + rqs2t = (r - q + 0.5 * vol * vol) * T + d1 = (np.log(S / K) + rqs2t) / srt + d2 = d1 - srt + df = np.exp(-r * T) + qf = np.exp(-q * T) + + if self.option["callput"] == "CALL": + return S * qf * norm_cdf(d1) - K * df * norm_cdf(d2) + else: + return -S * df * norm_cdf(-d1) + K * df * norm_cdf(-d2) + + def compare(self) -> bool: + """Compare MC price to analytic""" + ref = self.analytic() + err = self.pv / ref - 1.0 + neworder.log("mc: {:.6f} / ref: {:.6f} err={:.2%}".format(self.pv, ref, err)) + # relative error should be within O(1/(sqrt(sims))) of analytic solution + return True if abs(err) <= 2.0 / np.sqrt(self.nsims) else False + + def greeks(self) -> None: + # get all the results + pvs = neworder.mpi.COMM.gather(self.pv, 0) + # compute sensitivities on rank 0 + if pvs: + neworder.log(f"PV={pvs[0]:.3f}") + neworder.log(f"delta={(pvs[1] - pvs[2]) / 2:.3f}") + neworder.log(f"gamma={(pvs[1] - 2 * pvs[0] + pvs[2]):.3f}") + neworder.log(f"vega 10bp={pvs[3] - pvs[0]:.3f}") diff --git a/examples/option/helpers.py b/examples/option/helpers.py index f0e39672..97a9744a 100644 --- a/examples/option/helpers.py +++ b/examples/option/helpers.py @@ -1,20 +1,20 @@ from typing import TypeVar + import numpy as np -# for inverse cumulative normal -import scipy.stats # type: ignore import scipy.special # type: ignore +# for inverse cumulative normal +import scipy.stats # type: ignore -T = TypeVar('T') # Any type. +T = TypeVar("T") # Any type. nparray = np.ndarray[T, np.dtype[T]] def nstream(u: nparray[np.float64]) -> nparray[np.float64]: - """ Return a vector of n normally distributed pseudorandom variates (mean zero unit variance) """ - return scipy.stats.norm.ppf(u) + """Return a vector of n normally distributed pseudorandom variates (mean zero unit variance)""" + return scipy.stats.norm.ppf(u) def norm_cdf(x: nparray[np.float64]) -> nparray[np.float64]: - """ Compute the normal cumulatve density funtion """ - return (1.0 + scipy.special.erf(x / np.sqrt(2.0))) / 2.0 - + """Compute the normal cumulatve density funtion""" + return (1.0 + scipy.special.erf(x / np.sqrt(2.0))) / 2.0 diff --git a/examples/option/model.py b/examples/option/model.py index 9d90c6a6..a0cc8b15 100644 --- a/examples/option/model.py +++ b/examples/option/model.py @@ -5,9 +5,10 @@ can interact within the model, and how to synchronise the random streams in each process """ -import neworder from black_scholes import BlackScholes +import neworder + # neworder.verbose() # uncomment for verbose logging # neworder.checked(False) # uncomment to disable checks @@ -19,20 +20,20 @@ # market data market = { - "spot": 100.0, # underlying spot price - "rate": 0.02, # risk-free interest rate - "divy": 0.01, # (continuous) dividend yield - "vol": 0.2 # stock volatility + "spot": 100.0, # underlying spot price + "rate": 0.02, # risk-free interest rate + "divy": 0.01, # (continuous) dividend yield + "vol": 0.2, # stock volatility } # (European) option instrument data option = { - "callput": "CALL", - "strike": 100.0, - "expiry": 0.75 # years + "callput": "CALL", + "strike": 100.0, + "expiry": 0.75, # years } # model parameters -nsims = 1000000 # number of underlyings to simulate +nsims = 1000000 # number of underlyings to simulate # instantiate model bs_mc = BlackScholes(option, market, nsims) diff --git a/examples/parallel/model.py b/examples/parallel/model.py index d20b4315..1f3d3324 100644 --- a/examples/parallel/model.py +++ b/examples/parallel/model.py @@ -1,9 +1,10 @@ # !setup! +from parallel import Parallel # import our model definition + import neworder -from parallel import Parallel # import our model definition -#neworder.verbose() -#neworder.checked(False) +# neworder.verbose() +# neworder.checked(False) # must be MPI enabled assert neworder.mpi.SIZE > 1, "This configuration requires MPI with >1 process" diff --git a/examples/parallel/parallel.py b/examples/parallel/parallel.py index bc8e4972..ce10df4f 100644 --- a/examples/parallel/parallel.py +++ b/examples/parallel/parallel.py @@ -4,68 +4,81 @@ import neworder + class Parallel(neworder.Model): - def __init__(self, timeline: neworder.Timeline, p: float, n: int): - # initialise base model (essential!) - super().__init__(timeline, neworder.MonteCarlo.nondeterministic_stream) - - # enumerate possible states - self.s = np.arange(neworder.mpi.SIZE) - - # create transition matrix with all off-diagonal probabilities equal to p - self.p = np.identity(neworder.mpi.SIZE) * (1 - neworder.mpi.SIZE * p) + p - - # record initial population size - self.n = n - - # individuals get a unique id and their initial state is the MPI rank - self.pop = pd.DataFrame({"id": neworder.df.unique_index(n), - "state": np.full(n, neworder.mpi.RANK) }).set_index("id") -#!constructor! - - # !step! - def step(self) -> None: - # generate some movement - neworder.df.transition(self, self.s, self.p, self.pop, "state") - - # send emigrants to other processes - for s in range(neworder.mpi.SIZE): - if s != neworder.mpi.RANK: - emigrants = self.pop[self.pop.state == s] - neworder.log("sending %d emigrants to %d" % (len(emigrants), s)) - neworder.mpi.COMM.send(emigrants, dest=s) - - # remove the emigrants - self.pop = self.pop[self.pop.state == neworder.mpi.RANK] - - # receive immigrants - for s in range(neworder.mpi.SIZE): - if s != neworder.mpi.RANK: - immigrants = neworder.mpi.COMM.recv(source=s) - if len(immigrants): - neworder.log("received %d immigrants from %d" % (len(immigrants), s)) - self.pop = pd.concat((self.pop, immigrants)) - # !step! - - # !check! - def check(self) -> bool: - # Ensure we haven't lost (or gained) anybody - totals = neworder.mpi.COMM.gather(len(self.pop), root=0) - if totals: - if sum(totals) != self.n * neworder.mpi.SIZE: - return False - # And check each process only has individuals that it should have - out_of_place = neworder.mpi.COMM.gather(len(self.pop[self.pop.state != neworder.mpi.RANK])) - if out_of_place and any(out_of_place): - return False - return True - # !check! - - # !finalise! - def finalise(self) -> None: - # process 0 assembles all the data and prints a summary - pops = neworder.mpi.COMM.gather(self.pop, root=0) - if pops: - pop = pd.concat(pops) - neworder.log("State counts (total %d):\n%s" % (len(pop), pop["state"].value_counts().to_string())) - # !finalise! + def __init__(self, timeline: neworder.Timeline, p: float, n: int): + # initialise base model (essential!) + super().__init__(timeline, neworder.MonteCarlo.nondeterministic_stream) + + # enumerate possible states + self.s = np.arange(neworder.mpi.SIZE) + + # create transition matrix with all off-diagonal probabilities equal to p + self.p = np.identity(neworder.mpi.SIZE) * (1 - neworder.mpi.SIZE * p) + p + + # record initial population size + self.n = n + + # individuals get a unique id and their initial state is the MPI rank + self.pop = pd.DataFrame( + {"id": neworder.df.unique_index(n), "state": np.full(n, neworder.mpi.RANK)} + ).set_index("id") + + #!constructor! + + # !step! + def step(self) -> None: + # generate some movement + neworder.df.transition(self, self.s, self.p, self.pop, "state") + + # send emigrants to other processes + for s in range(neworder.mpi.SIZE): + if s != neworder.mpi.RANK: + emigrants = self.pop[self.pop.state == s] + neworder.log("sending %d emigrants to %d" % (len(emigrants), s)) + neworder.mpi.COMM.send(emigrants, dest=s) + + # remove the emigrants + self.pop = self.pop[self.pop.state == neworder.mpi.RANK] + + # receive immigrants + for s in range(neworder.mpi.SIZE): + if s != neworder.mpi.RANK: + immigrants = neworder.mpi.COMM.recv(source=s) + if len(immigrants): + neworder.log( + "received %d immigrants from %d" % (len(immigrants), s) + ) + self.pop = pd.concat((self.pop, immigrants)) + + # !step! + + # !check! + def check(self) -> bool: + # Ensure we haven't lost (or gained) anybody + totals = neworder.mpi.COMM.gather(len(self.pop), root=0) + if totals: + if sum(totals) != self.n * neworder.mpi.SIZE: + return False + # And check each process only has individuals that it should have + out_of_place = neworder.mpi.COMM.gather( + len(self.pop[self.pop.state != neworder.mpi.RANK]) + ) + if out_of_place and any(out_of_place): + return False + return True + + # !check! + + # !finalise! + def finalise(self) -> None: + # process 0 assembles all the data and prints a summary + pops = neworder.mpi.COMM.gather(self.pop, root=0) + if pops: + pop = pd.concat(pops) + neworder.log( + "State counts (total %d):\n%s" + % (len(pop), pop["state"].value_counts().to_string()) + ) + + # !finalise! diff --git a/examples/people/model.py b/examples/people/model.py index 7066701d..bd2987d0 100644 --- a/examples/people/model.py +++ b/examples/people/model.py @@ -1,14 +1,14 @@ - """ model.py: Population Microsimulation - births, deaths and migration by age, gender and ethnicity """ import time from datetime import date -import neworder from population import Population -#neworder.verbose() +import neworder + +# neworder.verbose() # input data initial_population = "examples/people/E08000021_MSOA11_2011.csv" @@ -22,13 +22,17 @@ timeline = neworder.CalendarTimeline(date(2011, 1, 1), date(2051, 1, 1), 1, "y") # create the model -population = Population(timeline, initial_population, fertility_rate_data, mortality_rate_data, in_migration_rate_data, out_migration_rate_data) +population = Population( + timeline, + initial_population, + fertility_rate_data, + mortality_rate_data, + in_migration_rate_data, + out_migration_rate_data, +) # run the model start = time.time() ok = neworder.run(population) neworder.log("run time = %.2fs" % (time.time() - start)) assert ok - - - diff --git a/examples/people/population.py b/examples/people/population.py index 323e441d..e0015b53 100644 --- a/examples/people/population.py +++ b/examples/people/population.py @@ -3,163 +3,212 @@ """ import os -import pandas as pd # type: ignore -import numpy as np -import neworder +import numpy as np +import pandas as pd # type: ignore import pyramid -class Population(neworder.Model): - def __init__(self, - timeline: neworder.Timeline, - population_file: str, - fertility_file: str, - mortality_file: str, - in_migration_file: str, - out_migration_file: str): - - super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) - - # extract the local authority code from the filename - self.lad = os.path.basename(population_file).split("_")[0] - - self.population = pd.read_csv(population_file) - self.population.set_index(neworder.df.unique_index(len(self.population)), inplace=True, drop=True) - - # these datasets use a multiindex of age, gender and ethnicity - # out migration is a hazard rate - # in migration is the intensity of a Poisson process (not a hazard on existing residents!) - self.fertility = pd.read_csv(fertility_file, index_col=[0,1,2]) - self.mortality = pd.read_csv(mortality_file, index_col=[0,1,2]) - self.in_migration = pd.read_csv(in_migration_file, index_col=[0,1,2]) - self.out_migration = pd.read_csv(out_migration_file, index_col=[0,1,2]) - - # make gender and age categorical - self.population.DC1117EW_C_AGE = self.population.DC1117EW_C_AGE.astype("category") - self.population.DC1117EW_C_SEX = self.population.DC1117EW_C_SEX.astype("category") - - # actual age is randomised within the bound of the category (NB category values are age +1) - self.population["Age"] = self.population.DC1117EW_C_AGE.astype(int) - self.mc.ustream(len(self.population)) - - self.fig = None - self.plot_pyramid() - - def step(self) -> None: - self.births() - self.deaths() - self.migrations() - self.age() - - def finalise(self) -> None: - pass - - def age(self) -> None: - # Increment age by timestep and update census age category (used for ASFR/ASMR lookup) - # NB census age category max value is 86 (=85 or over) - self.population.Age = self.population.Age + 1 # NB self.timeline.dt wont be exactly 1 as based on an average length year of 365.2475 days - # reconstruct census age group - self.population.DC1117EW_C_AGE = np.clip(np.ceil(self.population.Age), 1, 86).astype(int) - - def births(self) -> None: - # First consider only females - females = self.population[self.population.DC1117EW_C_SEX == 2].copy() - - # Now map the appropriate fertility rate to each female - # might be a more efficient way of generating this array - rates = females.join(self.fertility, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"].values - # Then randomly determine if a birth occurred - h = self.mc.hazard(rates * self.timeline.dt) - - # The babies are a clone of the new mothers, with with changed PID, reset age and randomised gender (keeping location and ethnicity) - newborns = females[h == 1].copy() - newborns.set_index(neworder.df.unique_index(len(newborns)), inplace=True, drop=True) - newborns.Age = self.mc.ustream(len(newborns)) - 1.0 # born within the *next* 12 months (ageing step has yet to happen) - newborns.DC1117EW_C_AGE = 1 # this is 0-1 in census category - newborns.DC1117EW_C_SEX = 1 + self.mc.hazard(0.5, len(newborns)).astype(int) # 1=M, 2=F - - self.population = pd.concat((self.population, newborns)) - - def deaths(self) -> None: - # Map the appropriate mortality rate to each person - # might be a more efficient way of generating this array - rates = self.population.join(self.mortality, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"] - - # Then randomly determine if a death occurred - h = self.mc.hazard(rates.values * self.timeline.dt) - - # Finally remove deceased from table - self.population = self.population[h!=1] - - def migrations(self) -> None: - - # immigration: - # - sample counts of migrants according to intensity - # - append result to population - - self.in_migration["count"] = self.mc.counts(self.in_migration.Rate.values, self.timeline.dt) - h_in = self.in_migration.loc[self.in_migration.index.repeat(self.in_migration["count"])].drop(["Rate", "count"], axis=1) - h_in = h_in.reset_index().set_index(neworder.df.unique_index(len(h_in))) - h_in["Area"] = self.lad - # randomly sample exact age according to age group - h_in["Age"] = h_in.DC1117EW_C_AGE - self.mc.ustream(len(h_in)) - - # internal emigration: - out_rates = self.population.join(self.out_migration, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"].values - h_out = self.mc.hazard(out_rates * self.timeline.dt) - # add incoming & remove outgoing migrants - self.population = pd.concat((self.population[h_out!=1], h_in)) - - # record net migration - self.in_out = (len(h_in), h_out.sum()) - - def mean_age(self) -> float: - return self.population.Age.mean() - - def gender_split(self) -> float: - # this is % female - return self.population.DC1117EW_C_SEX.mean() - 1.0 - - def size(self) -> int: - return len(self.population) - - def check(self) -> bool: - """ State of the nation """ - # check no duplicated unique indices - if len(self.population[self.population.index.duplicated(keep=False)]): - neworder.log("Duplicate indices found") - return False - # Valid ETH, SEX, AGE - if not np.array_equal(sorted(self.population.DC1117EW_C_SEX.unique()), [1,2]): - neworder.log("invalid gender value") - return False - if min(self.population.DC1117EW_C_AGE.unique().astype(int)) < 1 or \ - max(self.population.DC1117EW_C_AGE.unique().astype(int)) > 86: - neworder.log("invalid categorical age value") - return False - # this can go below zero for cat 86+ - if (self.population.DC1117EW_C_AGE - self.population.Age).max() >= 1.0: - neworder.log("invalid fractional age value") - return False - - neworder.log("check OK: time={} size={} mean_age={:.2f}, pct_female={:.2f} net_migration={} ({}-{})" \ - .format(self.timeline.time.date(), self.size(), self.mean_age(), 100.0 * self.gender_split(), - self.in_out[0] - self.in_out[1], self.in_out[0], self.in_out[1])) - - # if all is ok, plot the data - self.plot_pyramid() - - return True # Faith - - def plot_pyramid(self) -> None: - a = np.arange(86) - s = self.population.groupby(by=["DC1117EW_C_SEX", "DC1117EW_C_AGE"])["DC1117EW_C_SEX"].count() - m = s[s.index.isin([1], level="DC1117EW_C_SEX")].values - f = s[s.index.isin([2], level="DC1117EW_C_SEX")].values - - if self.fig is None: - self.fig, self.axes, self.mbar, self.fbar = pyramid.plot(a, m, f) - else: - # NB self.timeline.time is now the time at the *end* of the timestep since this is called from check() (as opposed to step()) - self.mbar, self.fbar = pyramid.update(str(self.timeline.time.year), self.fig, self.axes, self.mbar, self.fbar, a, m, f) +import neworder +class Population(neworder.Model): + def __init__( + self, + timeline: neworder.Timeline, + population_file: str, + fertility_file: str, + mortality_file: str, + in_migration_file: str, + out_migration_file: str, + ): + super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) + + # extract the local authority code from the filename + self.lad = os.path.basename(population_file).split("_")[0] + + self.population = pd.read_csv(population_file) + self.population.set_index( + neworder.df.unique_index(len(self.population)), inplace=True, drop=True + ) + + # these datasets use a multiindex of age, gender and ethnicity + # out migration is a hazard rate + # in migration is the intensity of a Poisson process (not a hazard on existing residents!) + self.fertility = pd.read_csv(fertility_file, index_col=[0, 1, 2]) + self.mortality = pd.read_csv(mortality_file, index_col=[0, 1, 2]) + self.in_migration = pd.read_csv(in_migration_file, index_col=[0, 1, 2]) + self.out_migration = pd.read_csv(out_migration_file, index_col=[0, 1, 2]) + + # make gender and age categorical + self.population.DC1117EW_C_AGE = self.population.DC1117EW_C_AGE.astype( + "category" + ) + self.population.DC1117EW_C_SEX = self.population.DC1117EW_C_SEX.astype( + "category" + ) + + # actual age is randomised within the bound of the category (NB category values are age +1) + self.population["Age"] = self.population.DC1117EW_C_AGE.astype( + int + ) - self.mc.ustream(len(self.population)) + + self.fig = None + self.plot_pyramid() + + def step(self) -> None: + self.births() + self.deaths() + self.migrations() + self.age() + + def finalise(self) -> None: + pass + + def age(self) -> None: + # Increment age by timestep and update census age category (used for ASFR/ASMR lookup) + # NB census age category max value is 86 (=85 or over) + self.population.Age = ( + self.population.Age + 1 + ) # NB self.timeline.dt wont be exactly 1 as based on an average length year of 365.2475 days + # reconstruct census age group + self.population.DC1117EW_C_AGE = np.clip( + np.ceil(self.population.Age), 1, 86 + ).astype(int) + + def births(self) -> None: + # First consider only females + females = self.population[self.population.DC1117EW_C_SEX == 2].copy() + + # Now map the appropriate fertility rate to each female + # might be a more efficient way of generating this array + rates = females.join( + self.fertility, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"] + )["Rate"].values + # Then randomly determine if a birth occurred + h = self.mc.hazard(rates * self.timeline.dt) + + # The babies are a clone of the new mothers, with with changed PID, reset age and randomised gender (keeping location and ethnicity) + newborns = females[h == 1].copy() + newborns.set_index( + neworder.df.unique_index(len(newborns)), inplace=True, drop=True + ) + newborns.Age = ( + self.mc.ustream(len(newborns)) - 1.0 + ) # born within the *next* 12 months (ageing step has yet to happen) + newborns.DC1117EW_C_AGE = 1 # this is 0-1 in census category + newborns.DC1117EW_C_SEX = 1 + self.mc.hazard(0.5, len(newborns)).astype( + int + ) # 1=M, 2=F + + self.population = pd.concat((self.population, newborns)) + + def deaths(self) -> None: + # Map the appropriate mortality rate to each person + # might be a more efficient way of generating this array + rates = self.population.join( + self.mortality, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"] + )["Rate"] + + # Then randomly determine if a death occurred + h = self.mc.hazard(rates.values * self.timeline.dt) + + # Finally remove deceased from table + self.population = self.population[h != 1] + + def migrations(self) -> None: + # immigration: + # - sample counts of migrants according to intensity + # - append result to population + + self.in_migration["count"] = self.mc.counts( + self.in_migration.Rate.values, self.timeline.dt + ) + h_in = self.in_migration.loc[ + self.in_migration.index.repeat(self.in_migration["count"]) + ].drop(["Rate", "count"], axis=1) + h_in = h_in.reset_index().set_index(neworder.df.unique_index(len(h_in))) + h_in["Area"] = self.lad + # randomly sample exact age according to age group + h_in["Age"] = h_in.DC1117EW_C_AGE - self.mc.ustream(len(h_in)) + + # internal emigration: + out_rates = self.population.join( + self.out_migration, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"] + )["Rate"].values + h_out = self.mc.hazard(out_rates * self.timeline.dt) + # add incoming & remove outgoing migrants + self.population = pd.concat((self.population[h_out != 1], h_in)) + + # record net migration + self.in_out = (len(h_in), h_out.sum()) + + def mean_age(self) -> float: + return self.population.Age.mean() + + def gender_split(self) -> float: + # this is % female + return self.population.DC1117EW_C_SEX.mean() - 1.0 + + def size(self) -> int: + return len(self.population) + + def check(self) -> bool: + """State of the nation""" + # check no duplicated unique indices + if len(self.population[self.population.index.duplicated(keep=False)]): + neworder.log("Duplicate indices found") + return False + # Valid ETH, SEX, AGE + if not np.array_equal(sorted(self.population.DC1117EW_C_SEX.unique()), [1, 2]): + neworder.log("invalid gender value") + return False + if ( + min(self.population.DC1117EW_C_AGE.unique().astype(int)) < 1 + or max(self.population.DC1117EW_C_AGE.unique().astype(int)) > 86 + ): + neworder.log("invalid categorical age value") + return False + # this can go below zero for cat 86+ + if (self.population.DC1117EW_C_AGE - self.population.Age).max() >= 1.0: + neworder.log("invalid fractional age value") + return False + + neworder.log( + "check OK: time={} size={} mean_age={:.2f}, pct_female={:.2f} net_migration={} ({}-{})".format( + self.timeline.time.date(), + self.size(), + self.mean_age(), + 100.0 * self.gender_split(), + self.in_out[0] - self.in_out[1], + self.in_out[0], + self.in_out[1], + ) + ) + + # if all is ok, plot the data + self.plot_pyramid() + + return True # Faith + + def plot_pyramid(self) -> None: + a = np.arange(86) + s = self.population.groupby( + by=["DC1117EW_C_SEX", "DC1117EW_C_AGE"], observed=False + )["DC1117EW_C_SEX"].count() + m = s[s.index.isin([1], level="DC1117EW_C_SEX")].values + f = s[s.index.isin([2], level="DC1117EW_C_SEX")].values + + if self.fig is None: + self.fig, self.axes, self.mbar, self.fbar = pyramid.plot(a, m, f) + else: + # NB self.timeline.time is now the time at the *end* of the timestep since this is called from check() (as opposed to step()) + self.mbar, self.fbar = pyramid.update( + str(self.timeline.time.year), + self.fig, + self.axes, + self.mbar, + self.fbar, + a, + m, + f, + ) diff --git a/examples/people/pyramid.py b/examples/people/pyramid.py index 9ace22d5..b0a8a335 100644 --- a/examples/people/pyramid.py +++ b/examples/people/pyramid.py @@ -1,58 +1,64 @@ """ pyramid plots """ from __future__ import annotations + from typing import Any -import numpy as np + import matplotlib.pyplot as plt # type: ignore -import matplotlib.animation as anim # type: ignore +import numpy as np + # see https://stackoverflow.com/questions/27694221/using-python-libraries-to-plot-two-horizontal-bar-charts-sharing-same-y-axis -def plot(ages: np.ndarray[np.int64, np.dtype[np.int64]], - males: np.ndarray[np.float64, np.dtype[np.float64]], - females: np.ndarray[np.float64, np.dtype[np.float64]]) -> tuple[plt.Figure, plt.Axes, Any, Any]: - - xmax = 4000 #max(max(males), max(females)) - - fig, axes = plt.subplots(ncols=2, sharey=True) - plt.gca().set_ylim([min(ages),max(ages)+1]) - fig.suptitle("2011") - axes[0].set(title='Males') - axes[0].set(xlim=[0, xmax]) - axes[1].set(title='Females') - axes[1].set(xlim=[0, xmax]) - axes[0].yaxis.tick_right() - #axes[1].set(yticks=ages) - axes[0].invert_xaxis() - for ax in axes.flat: - ax.margins(0.03) - fig.tight_layout() - fig.subplots_adjust(wspace=0.125) - mbar = axes[0].barh(ages, males, align='center', color='blue') - fbar = axes[1].barh(ages, females, align='center', color='red') - plt.pause(0.1) - plt.ion() - #plt.savefig("./pyramid2011.png") - - return fig, axes, mbar, fbar - -def update(title: str, - fig: plt.Figure, - axes: plt.Axes, - mbar: Any, - fbar: Any, - ages: np.ndarray[np.int64, np.dtype[np.int64]], - males: np.ndarray[np.float64, np.dtype[np.float64]], - females: np.ndarray[np.float64, np.dtype[np.float64]]) -> tuple[Any, Any]: - for rect, h in zip(mbar, males): - rect.set_width(h) - for rect, h in zip(fbar, females): - rect.set_width(h) - - fig.suptitle(title) - #plt.savefig("./pyramid%s.png" % title) - plt.pause(0.1) - return mbar, fbar +def plot( + ages: np.ndarray[np.int64, np.dtype[np.int64]], + males: np.ndarray[np.float64, np.dtype[np.float64]], + females: np.ndarray[np.float64, np.dtype[np.float64]], +) -> tuple[plt.Figure, plt.Axes, Any, Any]: + xmax = 4000 # max(max(males), max(females)) + + fig, axes = plt.subplots(ncols=2, sharey=True) + plt.gca().set_ylim([min(ages), max(ages) + 1]) + fig.suptitle("2011") + axes[0].set(title="Males") + axes[0].set(xlim=[0, xmax]) + axes[1].set(title="Females") + axes[1].set(xlim=[0, xmax]) + axes[0].yaxis.tick_right() + # axes[1].set(yticks=ages) + axes[0].invert_xaxis() + for ax in axes.flat: + ax.margins(0.03) + fig.tight_layout() + fig.subplots_adjust(wspace=0.125) + mbar = axes[0].barh(ages, males, align="center", color="blue") + fbar = axes[1].barh(ages, females, align="center", color="red") + plt.pause(0.1) + plt.ion() + # plt.savefig("./pyramid2011.png") + + return fig, axes, mbar, fbar + + +def update( + title: str, + fig: plt.Figure, + axes: plt.Axes, + mbar: Any, + fbar: Any, + ages: np.ndarray[np.int64, np.dtype[np.int64]], + males: np.ndarray[np.float64, np.dtype[np.float64]], + females: np.ndarray[np.float64, np.dtype[np.float64]], +) -> tuple[Any, Any]: + for rect, h in zip(mbar, males): + rect.set_width(h) + for rect, h in zip(fbar, females): + rect.set_width(h) + + fig.suptitle(title) + # plt.savefig("./pyramid%s.png" % title) + plt.pause(0.1) + return mbar, fbar + # def hist(a): # plt.hist(a, bins=range(120)) # plt.show() - diff --git a/examples/riskpaths/data.py b/examples/riskpaths/data.py index 7910e78e..161a4488 100644 --- a/examples/riskpaths/data.py +++ b/examples/riskpaths/data.py @@ -1,25 +1,30 @@ +from enum import Enum import numpy as np -from enum import Enum + # classification UNION_STATE class UnionState(Enum): - NEVER_IN_UNION = 0 - FIRST_UNION_PERIOD1 = 1 - FIRST_UNION_PERIOD2 = 2 - AFTER_FIRST_UNION = 3 - SECOND_UNION = 4 - AFTER_SECOND_UNION = 5 + NEVER_IN_UNION = 0 + FIRST_UNION_PERIOD1 = 1 + FIRST_UNION_PERIOD2 = 2 + AFTER_FIRST_UNION = 3 + SECOND_UNION = 4 + AFTER_SECOND_UNION = 5 + class Parity(Enum): - CHILDLESS = 0 - PREGNANT = 1 + CHILDLESS = 0 + PREGNANT = 1 + +def partition( + start: float, finish: float, step: float = 1.0 +) -> np.ndarray[np.float64, np.dtype[np.float64]]: + """Helper function to return an inclusive equal-spaced range, i.e. finish will be the last element""" + # ensure finish is always included + return np.append(np.arange(start, finish, step), finish) -def partition(start: float, finish: float, step: float=1.) -> np.ndarray[np.float64, np.dtype[np.float64]]: - """ Helper function to return an inclusive equal-spaced range, i.e. finish will be the last element """ - # ensure finish is always included - return np.append(np.arange(start, finish, step), finish) # Dynamics parameters @@ -51,14 +56,44 @@ def partition(start: float, finish: float, step: float=1.) -> np.ndarray[np.floa # }; # f(AgeintState) -p_preg = np.array([0.0, 0.2869, 0.7591, 0.8458, 0.8167, 0.6727, 0.5105, 0.4882, 0.2562, 0.2597, 0.1542, 0.0]) +p_preg = np.array( + [ + 0.0, + 0.2869, + 0.7591, + 0.8458, + 0.8167, + 0.6727, + 0.5105, + 0.4882, + 0.2562, + 0.2597, + 0.1542, + 0.0, + ] +) # // Age baseline for 1st union formation # double AgeBaselineForm1[AGEINT_STATE] = { # 0, 0.030898, 0.134066, 0.167197, 0.165551, 0.147390, 0.108470, 0.080378, 0.033944, 0.045454, 0.040038, 0, # }; -p_u1f = np.array([0.0, 0.030898, 0.134066, 0.167197, 0.165551, 0.147390, 0.108470, 0.080378, 0.033944, 0.045454, 0.040038, 0.0]) +p_u1f = np.array( + [ + 0.0, + 0.030898, + 0.134066, + 0.167197, + 0.165551, + 0.147390, + 0.108470, + 0.080378, + 0.033944, + 0.045454, + 0.040038, + 0.0, + ] +) # union1 lasts at least 3 years min_u1 = 3.0 @@ -82,7 +117,9 @@ def partition(start: float, finish: float, step: float=1.) -> np.ndarray[np.floa # currently need to modify above to have equal spacing union_delta_t = 2.0 # 1 3 5 7 9 11 13 -r_u2f = np.array([0.1995702, 0.1353028, 0.1099149, 0.1099149, 0.0261186, 0.0261186, 0.0456905]) +r_u2f = np.array( + [0.1995702, 0.1353028, 0.1099149, 0.1099149, 0.0261186, 0.0261186, 0.0456905] +) # Something wrong here: more data than dims # // Union Duration Baseline of Dissolution @@ -93,5 +130,9 @@ def partition(start: float, finish: float, step: float=1.) -> np.ndarray[np.floa # }; # 1 3 5 7 9 11 13 -r_diss2 = np.array([[0.0096017, 0.0199994, 0.0199994, 0.0199994, 0.0213172, 0.0150836, 0.0110791], - [0.0370541, 0.0370541, 0.012775, 0.012775, 0.012775, 0.0661157, 0.0661157]]) +r_diss2 = np.array( + [ + [0.0096017, 0.0199994, 0.0199994, 0.0199994, 0.0213172, 0.0150836, 0.0110791], + [0.0370541, 0.0370541, 0.012775, 0.012775, 0.012775, 0.0661157, 0.0661157], + ] +) diff --git a/examples/riskpaths/model.py b/examples/riskpaths/model.py index fa7965ea..c717561b 100644 --- a/examples/riskpaths/model.py +++ b/examples/riskpaths/model.py @@ -16,13 +16,13 @@ available at www.statcan.gc.ca/microsimulation/modgen/modgen-eng.htm' """ -import neworder -from data import max_age from riskpaths import RiskPaths from visualisation import plot +import neworder + # serial mode -#neworder.verbose() +# neworder.verbose() population_size = 100000 diff --git a/examples/riskpaths/riskpaths.py b/examples/riskpaths/riskpaths.py index b503971b..b02d16b4 100644 --- a/examples/riskpaths/riskpaths.py +++ b/examples/riskpaths/riskpaths.py @@ -1,12 +1,14 @@ """ RiskPaths model """ +import data import numpy as np import pandas as pd # type: ignore -import neworder # dynamics data -from data import UnionState, Parity -import data +from data import Parity, UnionState + +import neworder + # !ctor! class RiskPaths(neworder.Model): diff --git a/examples/riskpaths/visualisation.py b/examples/riskpaths/visualisation.py index c139031b..36cc4770 100644 --- a/examples/riskpaths/visualisation.py +++ b/examples/riskpaths/visualisation.py @@ -1,21 +1,32 @@ +from data import max_age, min_age from matplotlib import pyplot as plt # type: ignore + import neworder -from data import min_age, max_age -def plot(model: neworder.Model) -> None: - bins=range(int(min_age),int(max_age)+1) +def plot(model: neworder.Model) -> None: + bins = range(int(min_age), int(max_age) + 1) - b = [ model.population.T_Union1Start, + b = [ + model.population.T_Union1Start, model.population.T_Union1End, model.population.T_Union2Start, - model.population.T_Union2End ] + model.population.T_Union2End, + ] - plt.hist(b, bins=bins, stacked=True) - plt.hist(model.population.TimeOfPregnancy, bins, color='purple') - plt.legend(["Union 1 starts", "Union 1 ends", "Union 2 starts", "Union 2 ends", "Pregnancy"]) - plt.title("Age distribution of first pregnancy, dependent on union state") - plt.xlabel("Age (y)") - plt.ylabel("Frequency") - #plt.savefig("./docs/examples/img/riskpaths.png", dpi=80) - plt.show() + plt.hist(b, bins=bins, stacked=True) + plt.hist(model.population.TimeOfPregnancy, bins, color="purple") + plt.legend( + [ + "Union 1 starts", + "Union 1 ends", + "Union 2 starts", + "Union 2 ends", + "Pregnancy", + ] + ) + plt.title("Age distribution of first pregnancy, dependent on union state") + plt.xlabel("Age (y)") + plt.ylabel("Frequency") + # plt.savefig("./docs/examples/img/riskpaths.png", dpi=80) + plt.show() diff --git a/examples/schelling/model.py b/examples/schelling/model.py index 6b197d93..3367b73f 100644 --- a/examples/schelling/model.py +++ b/examples/schelling/model.py @@ -1,11 +1,12 @@ import numpy as np -import neworder from schelling import Schelling -#neworder.verbose() +import neworder + +# neworder.verbose() # category 0 is empty -gridsize = (480,360) +gridsize = (480, 360) categories = np.array([0.36, 0.12, 0.12, 0.4]) # normalise if necessary # categories = categories / sum(categories) diff --git a/examples/schelling/schelling.py b/examples/schelling/schelling.py index daebd89a..2d1f9d8b 100644 --- a/examples/schelling/schelling.py +++ b/examples/schelling/schelling.py @@ -1,91 +1,103 @@ from __future__ import annotations -import numpy as np -import neworder import matplotlib.pyplot as plt # type: ignore -from matplotlib.image import AxesImage # type: ignore +import numpy as np from matplotlib import colors # type: ignore +from matplotlib.image import AxesImage # type: ignore -class Schelling(neworder.Model): - def __init__(self, - timeline: neworder.Timeline, - gridsize: tuple[int, int], - categories: np.ndarray[np.float64, np.dtype[np.float64]], - similarity: float) -> None: - # NB missing this line can cause memory corruption - super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) - - # category 0 is empty cell - self.ncategories = len(categories) - # randomly sample initial population according to category weights - init_pop = self.mc.sample(np.prod(gridsize), categories).reshape(gridsize) - self.sat = np.empty(gridsize, dtype=int) - self.similarity = similarity - - self.domain = neworder.StateGrid(init_pop, neworder.Edge.CONSTRAIN) - - self.fig, self.img = self.__init_visualisation() - - def step(self) -> None: - - # start with empty cells being satisfied - self.sat = (self.domain.state == 0) - - # !count! - # count all neighbours, scaling by acceptable similarity ratio - n_any = self.domain.count_neighbours(lambda x: x>0) * self.similarity - - for c in range(1,self.ncategories): - # count neighbour with a specific state - n_cat = self.domain.count_neighbours(lambda x: x==c) - self.sat = np.logical_or(self.sat, np.logical_and(n_cat > n_any, self.domain.state == c)) - # !count! - - n_unsat = np.sum(~self.sat) - - pop = self.domain.state.copy() - - free = list(zip(*np.where(pop == 0))) - for src in zip(*np.where(~self.sat)): - # pick a random destination - r = self.mc.raw() % len(free) - dest = free[r] - pop[dest] = pop[src] - pop[src] = 0 - free[r] = src - - self.domain.state = pop - - neworder.log("step %d %.4f%% unsatisfied" % (self.timeline.index, 100.0 * n_unsat / pop.size)) - - self.__update_visualisation() - - # !halt! - # finish early if everyone satisfied - if n_unsat == 0: - # set the halt flag in the runtime - self.halt() - # since the timeline is open-ended we need to explicitly call finalise - self.finalise() - # !halt! - - def finalise(self) -> None: - plt.pause(5.0) - - def __init_visualisation(self) -> tuple[plt.Figure, AxesImage]: - plt.ion() - - cmap = colors.ListedColormap(['white', 'red', 'blue', 'green', 'yellow'][:self.ncategories]) - - fig = plt.figure(constrained_layout=True, figsize=(8,6)) - img = plt.imshow(self.domain.state.T, cmap=cmap) - plt.axis('off') - fig.canvas.mpl_connect('key_press_event', lambda event: self.halt() if event.key == "q" else None) - fig.canvas.flush_events() +import neworder - return fig, img - def __update_visualisation(self) -> None: - self.img.set_array(self.domain.state.T) - # plt.savefig("/tmp/schelling%04d.png" % self.timeline.index, dpi=80) - self.fig.canvas.flush_events() +class Schelling(neworder.Model): + def __init__( + self, + timeline: neworder.Timeline, + gridsize: tuple[int, int], + categories: np.ndarray[np.float64, np.dtype[np.float64]], + similarity: float, + ) -> None: + # NB missing this line can cause memory corruption + super().__init__(timeline, neworder.MonteCarlo.deterministic_identical_stream) + + # category 0 is empty cell + self.ncategories = len(categories) + # randomly sample initial population according to category weights + init_pop = self.mc.sample(np.prod(gridsize), categories).reshape(gridsize) + self.sat = np.empty(gridsize, dtype=int) + self.similarity = similarity + + self.domain = neworder.StateGrid(init_pop, neworder.Edge.CONSTRAIN) + + self.fig, self.img = self.__init_visualisation() + + def step(self) -> None: + # start with empty cells being satisfied + self.sat = self.domain.state == 0 + + # !count! + # count all neighbours, scaling by acceptable similarity ratio + n_any = self.domain.count_neighbours(lambda x: x > 0) * self.similarity + + for c in range(1, self.ncategories): + # count neighbour with a specific state + n_cat = self.domain.count_neighbours(lambda x: x == c) + self.sat = np.logical_or( + self.sat, np.logical_and(n_cat > n_any, self.domain.state == c) + ) + # !count! + + n_unsat = np.sum(~self.sat) + + pop = self.domain.state.copy() + + free = list(zip(*np.where(pop == 0))) + for src in zip(*np.where(~self.sat)): + # pick a random destination + r = self.mc.raw() % len(free) + dest = free[r] + pop[dest] = pop[src] + pop[src] = 0 + free[r] = src + + self.domain.state = pop + + neworder.log( + "step %d %.4f%% unsatisfied" + % (self.timeline.index, 100.0 * n_unsat / pop.size) + ) + + self.__update_visualisation() + + # !halt! + # finish early if everyone satisfied + if n_unsat == 0: + # set the halt flag in the runtime + self.halt() + # since the timeline is open-ended we need to explicitly call finalise + self.finalise() + # !halt! + + def finalise(self) -> None: + plt.pause(5.0) + + def __init_visualisation(self) -> tuple[plt.Figure, AxesImage]: + plt.ion() + + cmap = colors.ListedColormap( + ["white", "red", "blue", "green", "yellow"][: self.ncategories] + ) + + fig = plt.figure(constrained_layout=True, figsize=(8, 6)) + img = plt.imshow(self.domain.state.T, cmap=cmap) + plt.axis("off") + fig.canvas.mpl_connect( + "key_press_event", lambda event: self.halt() if event.key == "q" else None + ) + fig.canvas.flush_events() + + return fig, img + + def __update_visualisation(self) -> None: + self.img.set_array(self.domain.state.T) + # plt.savefig("/tmp/schelling%04d.png" % self.timeline.index, dpi=80) + self.fig.canvas.flush_events() diff --git a/examples/wolf_sheep/model.py b/examples/wolf_sheep/model.py index b797ebcd..89ed8ebb 100644 --- a/examples/wolf_sheep/model.py +++ b/examples/wolf_sheep/model.py @@ -1,37 +1,32 @@ +from wolf_sheep import WolfSheep import neworder as no -from wolf_sheep import WolfSheep -#no.verbose() +# no.verbose() -assert no.mpi.RANK == 0 and no.mpi.SIZE == 1, "this example should only be run in serial mode" +assert ( + no.mpi.RANK == 0 and no.mpi.SIZE == 1 +), "this example should only be run in serial mode" params = { - "grid": { - "width": 100, - "height": 100 - }, - "wolves": { - "starting_population": 100, - "reproduce": 0.05, - "speed": 2.5, - "speed_variance": 0.05, - "gain_from_food": 20 - }, - "sheep": { - "starting_population": 300, - "reproduce": 0.04, - "speed": 0.9, - "speed_variance": 0.02, - "gain_from_food": 4 - }, - "grass": { - "regrowth_time": 12 - } + "grid": {"width": 100, "height": 100}, + "wolves": { + "starting_population": 100, + "reproduce": 0.05, + "speed": 2.5, + "speed_variance": 0.05, + "gain_from_food": 20, + }, + "sheep": { + "starting_population": 300, + "reproduce": 0.04, + "speed": 0.9, + "speed_variance": 0.02, + "gain_from_food": 4, + }, + "grass": {"regrowth_time": 12}, } m = WolfSheep(params) no.run(m) - - diff --git a/examples/wolf_sheep/wolf_sheep.py b/examples/wolf_sheep/wolf_sheep.py index 391ef2d7..52e92743 100644 --- a/examples/wolf_sheep/wolf_sheep.py +++ b/examples/wolf_sheep/wolf_sheep.py @@ -1,268 +1,335 @@ - from typing import Any, Tuple + +import matplotlib.pyplot as plt # type: ignore import numpy as np import pandas as pd # type: ignore -import neworder as no -import matplotlib.pyplot as plt # type: ignore +import neworder as no WOLF_COLOUR = "black" SHEEP_COLOUR = "red" GRASS_COLOUR = "green" -class WolfSheep(no.Model): - - def __init__(self, params: dict[str, Any]) -> None: - - super().__init__(no.LinearTimeline(0.0, 1.0), no.MonteCarlo.deterministic_independent_stream) - # hard-coded to unit timestep - self.width = params["grid"]["width"] - self.height = params["grid"]["height"] - self.domain = no.Space(np.array([0,0]), np.array([self.width, self.height]), edge=no.Edge.WRAP) - - n_wolves = params["wolves"]["starting_population"] - n_sheep =params["sheep"]["starting_population"] - - self.wolf_reproduce = params["wolves"]["reproduce"] - self.sheep_reproduce = params["sheep"]["reproduce"] - self.init_wolf_speed = params["wolves"]["speed"] - self.init_sheep_speed = params["sheep"]["speed"] - self.wolf_speed_stddev = np.sqrt(params["wolves"]["speed_variance"]) - self.sheep_speed_stddev = np.sqrt(params["sheep"]["speed_variance"]) - - self.wolf_gain_from_food = params["wolves"]["gain_from_food"] - self.sheep_gain_from_food = params["sheep"]["gain_from_food"] - self.grass_regrowth_time = params["grass"]["regrowth_time"] - - ncells = self.width * self.height - self.grass = pd.DataFrame( - index=pd.Index(name="id", data=no.df.unique_index(ncells)), - data={ - "x": np.tile(np.arange(self.width) + 0.5, self.height), - "y": np.repeat(np.arange(self.height) + 0.5, self.width), - # 50% initial probability of being fully grown, other states uniform - "countdown": self.mc.sample(ncells, [0.5] + [0.5/(self.grass_regrowth_time-1)]*(self.grass_regrowth_time-1)) - } - ) - - self.wolves = pd.DataFrame( - index=pd.Index(name="id", data=no.df.unique_index(n_wolves)), - data={ - "x": self.mc.ustream(n_wolves) * self.width, - "y": self.mc.ustream(n_wolves) * self.height, - "speed": self.init_wolf_speed, - "energy": (self.mc.ustream(n_wolves) + self.mc.ustream(n_wolves)) * self.wolf_gain_from_food - } - ) - self.__assign_cell(self.wolves) - - self.sheep = pd.DataFrame( - index=pd.Index(name="id", data=no.df.unique_index(n_sheep)), - data={ - "x": self.mc.ustream(n_sheep) * self.width, - "y": self.mc.ustream(n_sheep) * self.height, - "speed": self.init_sheep_speed, - "energy": (self.mc.ustream(n_sheep) + self.mc.ustream(n_sheep)) * self.sheep_gain_from_food - } - ) - self.__assign_cell(self.sheep) - - self.wolf_pop = [len(self.wolves)] - self.sheep_pop = [len(self.sheep)] - self.grass_prop = [100.0 * len(self.grass[self.grass.countdown==0])/len(self.grass)] - self.wolf_speed = [self.wolves.speed.mean()] - self.sheep_speed = [self.sheep.speed.mean()] - self.wolf_speed_var = [self.wolves.speed.var()] - self.sheep_speed_var = [self.sheep.speed.var()] - self.t = [self.timeline.index] - - (self.ax_g, self.ax_w, self.ax_s, - self.ax_t1, self.ax_wt, self.ax_st, - self.ax_t2, self.ax_gt, - self.ax_t3, self.ax_ws, self.b_ws, - self.ax_t4, self.ax_ss, self.b_ss) = self.__init_plot() - - # no.log(self.wolves) - # no.log(self.sheep) - # no.log(self.grass) - self.paused = False - - # seed numpy random generator using our generator (for reproducible normal samples) - self.npgen = np.random.default_rng(self.mc.raw()) - - def step(self) -> None: - - # step each population - self.__step_grass() - self.__step_wolves() - self.__step_sheep() - - def check(self) -> bool: - # record data - self.t.append(self.timeline.index) - self.wolf_pop.append(len(self.wolves)) - self.sheep_pop.append(len(self.sheep)) - self.grass_prop.append(100.0 * len(self.grass[self.grass.countdown==0])/len(self.grass)) - self.wolf_speed.append(self.wolves.speed.mean()) - self.sheep_speed.append(self.sheep.speed.mean()) - self.wolf_speed_var.append(self.wolves.speed.var()) - self.sheep_speed_var.append(self.sheep.speed.var()) - - self.__update_plot() - - if self.wolves.empty and self.sheep.empty: - no.log("Wolves and sheep have died out") - self.halt() - return True - - def __step_grass(self) -> None: - # grow grass - self.grass.countdown = np.clip(self.grass.countdown-1, 0, None) - - def __step_wolves(self) -> None: - # move wolves (wrapped) and update cell - vx = (2 * self.mc.ustream(len(self.wolves)) - 1.0) * self.wolves.speed - vy = (2 * self.mc.ustream(len(self.wolves)) - 1.0) * self.wolves.speed - (self.wolves.x, self.wolves.y), _ = self.domain.move((self.wolves.x, self.wolves.y), (vx, vy), 1.0, ungroup=True) - - # half of energy (initially) is consumed by moving - self.wolves.energy -= 0.5 + 0.5 * self.wolves.speed / self.init_wolf_speed - self.__assign_cell(self.wolves) - - # eat sheep if available - diners = self.wolves.loc[self.wolves.cell.isin(self.sheep.cell)] - self.wolves.loc[self.wolves.cell.isin(self.sheep.cell), "energy"] += self.wolf_gain_from_food - # NB *all* the sheep in cells with wolves get eaten (or at least killed) - self.sheep = self.sheep[~self.sheep.cell.isin(diners.cell)] - - # remove dead - self.wolves = self.wolves[self.wolves.energy >= 0] - - # breed - m = self.mc.hazard(self.wolf_reproduce, len(self.wolves)) - self.wolves.loc[m == 1, "energy"] /= 2 - cubs = self.wolves[m == 1].copy().set_index(no.df.unique_index(int(sum(m)))) - # evolve speed/burn rate from mother + random factor (don't allow to go <=0) should probably be lognormal - cubs.speed = np.maximum(cubs.speed + self.npgen.normal(0.0, self.wolf_speed_stddev, len(cubs)), 0.1) - self.wolves = pd.concat((self.wolves, cubs)) - - def __step_sheep(self) -> None: - # move sheep randomly (wrapped) - vx = (2 * self.mc.ustream(len(self.sheep)) - 1.0) * self.sheep.speed - vy = (2 * self.mc.ustream(len(self.sheep)) - 1.0) * self.sheep.speed - (self.sheep.x, self.sheep.y), _ = self.domain.move((self.sheep.x, self.sheep.y), (vx, vy), 1.0, ungroup=True) - self.sheep.energy -= 0.5 + 0.5 * self.sheep.speed / self.init_sheep_speed - self.__assign_cell(self.sheep) - - # eat grass if available - grass_available = self.grass.loc[self.sheep.cell] - self.sheep.energy += (grass_available.countdown.values == 0) * self.sheep_gain_from_food - self.grass.loc[self.sheep.cell, "countdown"] = self.grass.loc[self.sheep.cell, "countdown"].apply(lambda c: self.grass_regrowth_time if c == 0 else c) - - # remove dead - self.sheep = self.sheep[self.sheep.energy >= 0] - - # breed - m = self.mc.hazard(self.sheep_reproduce, len(self.sheep)) - self.sheep.loc[m == 1, "energy"] /= 2 - lambs = self.sheep[m == 1].copy().set_index(no.df.unique_index(int(sum(m)))) - # evolve speed from mother + random factor - lambs.speed = np.maximum(lambs.speed + self.npgen.normal(0.0, self.sheep_speed_stddev, len(lambs)), 0.1) - self.sheep = pd.concat((self.sheep, lambs)) - - def __assign_cell(self, agents: pd.DataFrame) -> None: - # not ints for some reason - agents["cell"] = (agents.x.astype(int) + self.width * agents.y.astype(int)).astype(int) - - def __init_plot(self) -> Tuple[Any, ...]: - - plt.ion() - - self.figs = plt.figure(figsize=(15,5)) - self.figs.suptitle("[q to quit, s to save, f toggles full screen]", y=0.05, x=0.15) - gs = self.figs.add_gridspec(2, 3) - ax0 = self.figs.add_subplot(gs[:, 0]) - ax1 = self.figs.add_subplot(gs[0, 1]) - ax2 = self.figs.add_subplot(gs[1, 1]) - - ax3 = self.figs.add_subplot(gs[0, 2]) - ax4 = self.figs.add_subplot(gs[1, 2]) - - # agent map - ax_g = ax0.imshow(np.flip(self.grass.countdown.values.reshape(self.height, self.width), axis=0), - extent=[0, self.width, 0, self.height], cmap="Greens_r", alpha=0.5) - ax_w = ax0.scatter(self.wolves.x, self.wolves.y, s=6, color=WOLF_COLOUR) - ax_s = ax0.scatter(self.sheep.x, self.sheep.y, s=6, color=SHEEP_COLOUR) - ax0.set_axis_off() - - # wolf and sheep population - ax_wt = ax1.plot(self.t, self.wolf_pop, color=WOLF_COLOUR) - ax_st = ax1.plot(self.t, self.sheep_pop, color=SHEEP_COLOUR) - ax1.set_xlim([0, 100]) - ax1.set_ylim([0, max(self.wolf_pop[0], self.sheep_pop[0])]) - ax1.set_ylabel("Population") - ax1.legend(["Wolves", "Sheep"]) - - # grass - ax_gt = ax2.plot(0, self.grass_prop[0], color=GRASS_COLOUR) - ax2.set_xlim([0, 100]) - ax2.set_ylim([0.0, 100.0]) - ax2.set_ylabel("% fully grown grass") - ax2.set_xlabel("Step") - - # wolf speed distribution - - bins = np.linspace(0, self.init_wolf_speed * 3.0,51) - width = self.init_wolf_speed * 3.0 / 50 - _, b_ws, ax_ws = ax3.hist([self.init_wolf_speed], bins=bins, width=width, color=WOLF_COLOUR) - #ax3.set_xlabel("Speed distribution") - ax3.axvline(self.init_wolf_speed) - - # sheep speed distribution - bins = np.linspace(0, self.init_sheep_speed * 3.0,51) - width = self.init_sheep_speed * 3.0 / 50 - _, b_ss, ax_ss = ax4.hist([self.init_sheep_speed], bins=bins, width=width, color=SHEEP_COLOUR) - ax4.set_xlabel("Speed distribution") - ax4.axvline(self.init_sheep_speed) - - plt.tight_layout() - - self.figs.canvas.mpl_connect('key_press_event', lambda event: self.halt() if event.key == "q" else None) - - self.figs.canvas.flush_events() - - return ax_g, ax_w, ax_s, \ - ax1, ax_wt, ax_st, \ - ax2, ax_gt, \ - ax3, ax_ws, b_ws, \ - ax4, ax_ss, b_ss - - - def __update_plot(self) -> None: - self.ax_g.set_data(np.flip(self.grass.countdown.values.reshape(self.height, self.width), axis=0)) - self.ax_w.set_offsets(np.c_[self.wolves.x, self.wolves.y]) - self.ax_s.set_offsets(np.c_[self.sheep.x, self.sheep.y]) - - self.ax_wt[0].set_data(self.t, self.wolf_pop) - self.ax_st[0].set_data(self.t, self.sheep_pop) - self.ax_t1.set_xlim([0,self.t[-1]]) - self.ax_t1.set_ylim([0,max(max(self.wolf_pop), max(self.sheep_pop))]) - - self.ax_gt[0].set_data(self.t, self.grass_prop) - self.ax_t2.set_xlim([0,self.t[-1]]) - - if not self.wolves.empty: - n, bins = np.histogram(self.wolves.speed, bins=self.b_ws) - for rect, h in zip(self.ax_ws, n/len(self.wolves)): - rect.set_height(h) - self.ax_t3.set_ylim([0, max(n/len(self.wolves))]) - - if not self.sheep.empty: - n, bins = np.histogram(self.sheep.speed, bins=self.b_ss) - for rect, h in zip(self.ax_ss, n/len(self.sheep)): - rect.set_height(h) - self.ax_t4.set_ylim([0, max(n/len(self.sheep))]) - - #plt.savefig("/tmp/wolf-sheep%04d.png" % self.timeline.index, dpi=80) - self.figs.canvas.flush_events() +class WolfSheep(no.Model): + def __init__(self, params: dict[str, Any]) -> None: + super().__init__( + no.LinearTimeline(0.0, 1.0), no.MonteCarlo.deterministic_independent_stream + ) + # hard-coded to unit timestep + self.width = params["grid"]["width"] + self.height = params["grid"]["height"] + self.domain = no.Space( + np.array([0, 0]), np.array([self.width, self.height]), edge=no.Edge.WRAP + ) + + n_wolves = params["wolves"]["starting_population"] + n_sheep = params["sheep"]["starting_population"] + + self.wolf_reproduce = params["wolves"]["reproduce"] + self.sheep_reproduce = params["sheep"]["reproduce"] + self.init_wolf_speed = params["wolves"]["speed"] + self.init_sheep_speed = params["sheep"]["speed"] + self.wolf_speed_stddev = np.sqrt(params["wolves"]["speed_variance"]) + self.sheep_speed_stddev = np.sqrt(params["sheep"]["speed_variance"]) + + self.wolf_gain_from_food = params["wolves"]["gain_from_food"] + self.sheep_gain_from_food = params["sheep"]["gain_from_food"] + self.grass_regrowth_time = params["grass"]["regrowth_time"] + + ncells = self.width * self.height + self.grass = pd.DataFrame( + index=pd.Index(name="id", data=no.df.unique_index(ncells)), + data={ + "x": np.tile(np.arange(self.width) + 0.5, self.height), + "y": np.repeat(np.arange(self.height) + 0.5, self.width), + # 50% initial probability of being fully grown, other states uniform + "countdown": self.mc.sample( + ncells, + [0.5] + + [0.5 / (self.grass_regrowth_time - 1)] + * (self.grass_regrowth_time - 1), + ), + }, + ) + + self.wolves = pd.DataFrame( + index=pd.Index(name="id", data=no.df.unique_index(n_wolves)), + data={ + "x": self.mc.ustream(n_wolves) * self.width, + "y": self.mc.ustream(n_wolves) * self.height, + "speed": self.init_wolf_speed, + "energy": (self.mc.ustream(n_wolves) + self.mc.ustream(n_wolves)) + * self.wolf_gain_from_food, + }, + ) + self.__assign_cell(self.wolves) + + self.sheep = pd.DataFrame( + index=pd.Index(name="id", data=no.df.unique_index(n_sheep)), + data={ + "x": self.mc.ustream(n_sheep) * self.width, + "y": self.mc.ustream(n_sheep) * self.height, + "speed": self.init_sheep_speed, + "energy": (self.mc.ustream(n_sheep) + self.mc.ustream(n_sheep)) + * self.sheep_gain_from_food, + }, + ) + self.__assign_cell(self.sheep) + + self.wolf_pop = [len(self.wolves)] + self.sheep_pop = [len(self.sheep)] + self.grass_prop = [ + 100.0 * len(self.grass[self.grass.countdown == 0]) / len(self.grass) + ] + self.wolf_speed = [self.wolves.speed.mean()] + self.sheep_speed = [self.sheep.speed.mean()] + self.wolf_speed_var = [self.wolves.speed.var()] + self.sheep_speed_var = [self.sheep.speed.var()] + self.t = [self.timeline.index] + + ( + self.ax_g, + self.ax_w, + self.ax_s, + self.ax_t1, + self.ax_wt, + self.ax_st, + self.ax_t2, + self.ax_gt, + self.ax_t3, + self.ax_ws, + self.b_ws, + self.ax_t4, + self.ax_ss, + self.b_ss, + ) = self.__init_plot() + + # no.log(self.wolves) + # no.log(self.sheep) + # no.log(self.grass) + self.paused = False + + # seed numpy random generator using our generator (for reproducible normal samples) + self.npgen = np.random.default_rng(self.mc.raw()) + + def step(self) -> None: + # step each population + self.__step_grass() + self.__step_wolves() + self.__step_sheep() + + def check(self) -> bool: + # record data + self.t.append(self.timeline.index) + self.wolf_pop.append(len(self.wolves)) + self.sheep_pop.append(len(self.sheep)) + self.grass_prop.append( + 100.0 * len(self.grass[self.grass.countdown == 0]) / len(self.grass) + ) + self.wolf_speed.append(self.wolves.speed.mean()) + self.sheep_speed.append(self.sheep.speed.mean()) + self.wolf_speed_var.append(self.wolves.speed.var()) + self.sheep_speed_var.append(self.sheep.speed.var()) + + self.__update_plot() + + if self.wolves.empty and self.sheep.empty: + no.log("Wolves and sheep have died out") + self.halt() + return True + + def __step_grass(self) -> None: + # grow grass + self.grass.countdown = np.clip(self.grass.countdown - 1, 0, None) + + def __step_wolves(self) -> None: + # move wolves (wrapped) and update cell + vx = (2 * self.mc.ustream(len(self.wolves)) - 1.0) * self.wolves.speed + vy = (2 * self.mc.ustream(len(self.wolves)) - 1.0) * self.wolves.speed + (self.wolves.x, self.wolves.y), _ = self.domain.move( + (self.wolves.x, self.wolves.y), (vx, vy), 1.0, ungroup=True + ) + + # half of energy (initially) is consumed by moving + self.wolves.energy -= 0.5 + 0.5 * self.wolves.speed / self.init_wolf_speed + self.__assign_cell(self.wolves) + + # eat sheep if available + diners = self.wolves.loc[self.wolves.cell.isin(self.sheep.cell)] + self.wolves.loc[ + self.wolves.cell.isin(self.sheep.cell), "energy" + ] += self.wolf_gain_from_food + # NB *all* the sheep in cells with wolves get eaten (or at least killed) + self.sheep = self.sheep[~self.sheep.cell.isin(diners.cell)] + + # remove dead + self.wolves = self.wolves[self.wolves.energy >= 0] + + # breed + m = self.mc.hazard(self.wolf_reproduce, len(self.wolves)) + self.wolves.loc[m == 1, "energy"] /= 2 + cubs = self.wolves[m == 1].copy().set_index(no.df.unique_index(int(sum(m)))) + # evolve speed/burn rate from mother + random factor (don't allow to go <=0) should probably be lognormal + cubs.speed = np.maximum( + cubs.speed + self.npgen.normal(0.0, self.wolf_speed_stddev, len(cubs)), 0.1 + ) + self.wolves = pd.concat((self.wolves, cubs)) + + def __step_sheep(self) -> None: + # move sheep randomly (wrapped) + vx = (2 * self.mc.ustream(len(self.sheep)) - 1.0) * self.sheep.speed + vy = (2 * self.mc.ustream(len(self.sheep)) - 1.0) * self.sheep.speed + (self.sheep.x, self.sheep.y), _ = self.domain.move( + (self.sheep.x, self.sheep.y), (vx, vy), 1.0, ungroup=True + ) + self.sheep.energy -= 0.5 + 0.5 * self.sheep.speed / self.init_sheep_speed + self.__assign_cell(self.sheep) + + # eat grass if available + grass_available = self.grass.loc[self.sheep.cell] + self.sheep.energy += ( + grass_available.countdown.values == 0 + ) * self.sheep_gain_from_food + self.grass.loc[self.sheep.cell, "countdown"] = self.grass.loc[ + self.sheep.cell, "countdown" + ].apply(lambda c: self.grass_regrowth_time if c == 0 else c) + + # remove dead + self.sheep = self.sheep[self.sheep.energy >= 0] + + # breed + m = self.mc.hazard(self.sheep_reproduce, len(self.sheep)) + self.sheep.loc[m == 1, "energy"] /= 2 + lambs = self.sheep[m == 1].copy().set_index(no.df.unique_index(int(sum(m)))) + # evolve speed from mother + random factor + lambs.speed = np.maximum( + lambs.speed + self.npgen.normal(0.0, self.sheep_speed_stddev, len(lambs)), + 0.1, + ) + self.sheep = pd.concat((self.sheep, lambs)) + + def __assign_cell(self, agents: pd.DataFrame) -> None: + # not ints for some reason + agents["cell"] = ( + agents.x.astype(int) + self.width * agents.y.astype(int) + ).astype(int) + + def __init_plot(self) -> Tuple[Any, ...]: + plt.ion() + + self.figs = plt.figure(figsize=(15, 5)) + self.figs.suptitle( + "[q to quit, s to save, f toggles full screen]", y=0.05, x=0.15 + ) + gs = self.figs.add_gridspec(2, 3) + ax0 = self.figs.add_subplot(gs[:, 0]) + ax1 = self.figs.add_subplot(gs[0, 1]) + ax2 = self.figs.add_subplot(gs[1, 1]) + + ax3 = self.figs.add_subplot(gs[0, 2]) + ax4 = self.figs.add_subplot(gs[1, 2]) + + # agent map + ax_g = ax0.imshow( + np.flip( + self.grass.countdown.values.reshape(self.height, self.width), axis=0 + ), + extent=[0, self.width, 0, self.height], + cmap="Greens_r", + alpha=0.5, + ) + ax_w = ax0.scatter(self.wolves.x, self.wolves.y, s=6, color=WOLF_COLOUR) + ax_s = ax0.scatter(self.sheep.x, self.sheep.y, s=6, color=SHEEP_COLOUR) + ax0.set_axis_off() + + # wolf and sheep population + ax_wt = ax1.plot(self.t, self.wolf_pop, color=WOLF_COLOUR) + ax_st = ax1.plot(self.t, self.sheep_pop, color=SHEEP_COLOUR) + ax1.set_xlim([0, 100]) + ax1.set_ylim([0, max(self.wolf_pop[0], self.sheep_pop[0])]) + ax1.set_ylabel("Population") + ax1.legend(["Wolves", "Sheep"]) + + # grass + ax_gt = ax2.plot(0, self.grass_prop[0], color=GRASS_COLOUR) + ax2.set_xlim([0, 100]) + ax2.set_ylim([0.0, 100.0]) + ax2.set_ylabel("% fully grown grass") + ax2.set_xlabel("Step") + + # wolf speed distribution + + bins = np.linspace(0, self.init_wolf_speed * 3.0, 51) + width = self.init_wolf_speed * 3.0 / 50 + _, b_ws, ax_ws = ax3.hist( + [self.init_wolf_speed], bins=bins, width=width, color=WOLF_COLOUR + ) + # ax3.set_xlabel("Speed distribution") + ax3.axvline(self.init_wolf_speed) + + # sheep speed distribution + bins = np.linspace(0, self.init_sheep_speed * 3.0, 51) + width = self.init_sheep_speed * 3.0 / 50 + _, b_ss, ax_ss = ax4.hist( + [self.init_sheep_speed], bins=bins, width=width, color=SHEEP_COLOUR + ) + ax4.set_xlabel("Speed distribution") + ax4.axvline(self.init_sheep_speed) + + plt.tight_layout() + + self.figs.canvas.mpl_connect( + "key_press_event", lambda event: self.halt() if event.key == "q" else None + ) + + self.figs.canvas.flush_events() + + return ( + ax_g, + ax_w, + ax_s, + ax1, + ax_wt, + ax_st, + ax2, + ax_gt, + ax3, + ax_ws, + b_ws, + ax4, + ax_ss, + b_ss, + ) + + def __update_plot(self) -> None: + self.ax_g.set_data( + np.flip( + self.grass.countdown.values.reshape(self.height, self.width), axis=0 + ) + ) + self.ax_w.set_offsets(np.c_[self.wolves.x, self.wolves.y]) + self.ax_s.set_offsets(np.c_[self.sheep.x, self.sheep.y]) + + self.ax_wt[0].set_data(self.t, self.wolf_pop) + self.ax_st[0].set_data(self.t, self.sheep_pop) + self.ax_t1.set_xlim([0, self.t[-1]]) + self.ax_t1.set_ylim([0, max(max(self.wolf_pop), max(self.sheep_pop))]) + + self.ax_gt[0].set_data(self.t, self.grass_prop) + self.ax_t2.set_xlim([0, self.t[-1]]) + + if not self.wolves.empty: + n, bins = np.histogram(self.wolves.speed, bins=self.b_ws) + for rect, h in zip(self.ax_ws, n / len(self.wolves)): + rect.set_height(h) + self.ax_t3.set_ylim([0, max(n / len(self.wolves))]) + + if not self.sheep.empty: + n, bins = np.histogram(self.sheep.speed, bins=self.b_ss) + for rect, h in zip(self.ax_ss, n / len(self.sheep)): + rect.set_height(h) + self.ax_t4.set_ylim([0, max(n / len(self.sheep))]) + + # plt.savefig("/tmp/wolf-sheep%04d.png" % self.timeline.index, dpi=80) + self.figs.canvas.flush_events() diff --git a/mkdocs.yml b/mkdocs.yml index 44175a9a..2211ce8c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -4,26 +4,35 @@ site_author: Andrew P Smith site_description: Documentation for the neworder microsimulation framework repo_url: https://github.com/virgesmith/neworder repo_name: GitHub -copyright: "© 2017-2023 Andrew P Smith" -#google_analytics +copyright: > + Copyright © 2017-2023 Andrew P Smith – + Change cookie settings theme: name: material palette: scheme: default - primary: blue grey + primary: light blue accent: red features: - content.code.copy + - content.code.annotate markdown_extensions: - toc: permalink: "¶" - admonition - pymdownx.highlight - - pymdownx.superfences + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format - pymdownx.arithmatex: generic: true + - footnotes + - attr_list + - md_in_html extra_javascript: - js/config.js @@ -37,7 +46,15 @@ plugins: # include_dir: examples - mkdocs-video -# - mkpdfs +extra: + consent: + title: Cookie consent + description: >- + This site uses cookies with your permission. + actions: + - accept + - manage + - reject nav: - Home: index.md diff --git a/neworder/__init__.py b/neworder/__init__.py index 37da1960..21b25d4a 100644 --- a/neworder/__init__.py +++ b/neworder/__init__.py @@ -3,14 +3,29 @@ __version__ = importlib.metadata.version("neworder") from _neworder_core import ( - Model, - MonteCarlo, - Timeline, - NoTimeline, - LinearTimeline, - NumericTimeline, - CalendarTimeline, - time, df, mpi, log, run, stats, checked, verbose -) # type: ignore -from .domain import Edge, Domain, Space, StateGrid + CalendarTimeline, + LinearTimeline, + Model, + MonteCarlo, + NoTimeline, + NumericTimeline, + Timeline, + checked, + df, + log, + mpi, + run, + stats, + time, + verbose, +) + +# type: ignore +from .domain import Domain, Edge, Space, StateGrid from .mc import as_np + +__all__ = [ + "as_np", + "Domain", + "Space" +] \ No newline at end of file diff --git a/neworder/__init__.pyi b/neworder/__init__.pyi index 8d306569..ba874c30 100644 --- a/neworder/__init__.pyi +++ b/neworder/__init__.pyi @@ -1,25 +1,21 @@ """ A dynamic microsimulation framework"; """ + from __future__ import annotations -import typing -import datetime -import numpy as np -import numpy.typing as npt -import df # type: ignore -import mpi # type: ignore -from . import time -import stats # type: ignore -from .domain import NPFloatArray +import datetime +import typing -date_t = datetime.datetime | datetime.date -FloatArray1d = NPFloatArray | list[float] -NPIntArray = npt.NDArray[np.int64] -IntArray1d = NPIntArray | list[int] +import numpy +import numpy.typing as npt +from . import df, mpi, stats, time +from .domain import Domain, Edge, Space, StateGrid +from .mc import as_np __all__ = [ + "as_np", "CalendarTimeline", "LinearTimeline", "Model", @@ -34,70 +30,42 @@ __all__ = [ "run", "stats", "time", - "verbose" + "verbose", + "Space", + "Domain", + "Edge", + "StateGrid", + "as_np", ] -class Timeline(): - def __init__(self) -> None: ... - def __repr__(self) -> str: - """ - Prints a human-readable representation of the timeline object - """ - @property - def at_end(self) -> bool: - """ - Returns True if the current step is the end of the timeline - - :type: bool - """ - @property - def dt(self) -> float: - """ - Returns the step size size of the timeline - - :type: float - """ - @property - def end(self) -> object: - """ - Returns the time of the end of the timeline - - :type: object - """ - @property - def index(self) -> int: - """ - Returns the index of the current step in the timeline +class CalendarTimeline(Timeline): + """ - :type: int - """ - @property - def nsteps(self) -> int: - """ - Returns the number of steps in the timeline (or -1 if open-ended) + A calendar-based timeline + """ - :type: int + @typing.overload + def __init__( + self, start: datetime.date, end: datetime.date, step: int, unit: str + ) -> None: """ - @property - def start(self) -> object: + Constructs a calendar-based timeline, given start and end dates, an increment specified as a multiple of days, months or years """ - Returns the time of the start of the timeline - - :type: object + @typing.overload + def __init__(self, start: datetime.date, step: int, unit: str) -> None: """ - @property - def time(self) -> object: + Constructs an open-ended calendar-based timeline, given a start date and an increment specified as a multiple of days, months or years. + NB the model will run until the Model.halt() method is explicitly called (from inside the step() method). Note also that nsteps() will + return -1 for timelines constructed this way """ - Returns the time of the current step in the timeline - :type: object - """ - pass class LinearTimeline(Timeline): """ + An equally-spaced non-calendar timeline . """ + @typing.overload def __init__(self, start: float, end: float, nsteps: int) -> None: """ @@ -109,12 +77,55 @@ class LinearTimeline(Timeline): Constructs an open-ended timeline give a start value and a step size. NB the model will run until the Model.halt() method is explicitly called (from inside the step() method). Note also that nsteps() will return -1 for timelines constructed this way """ - pass -class Model(): + +class Model: """ + The base model class from which all neworder models should be subclassed """ - def __init__(self, timeline: Timeline, seeder: typing.Callable[[int], int] = MonteCarlo.deterministic_independent_stream) -> None: + + class RunState: + """ + Members: + + NOT_STARTED + + RUNNING + + HALTED + + COMPLETED + """ + + COMPLETED: typing.ClassVar[Model.RunState] # value = + HALTED: typing.ClassVar[Model.RunState] # value = + NOT_STARTED: typing.ClassVar[ + Model.RunState + ] # value = + RUNNING: typing.ClassVar[Model.RunState] # value = + __members__: typing.ClassVar[ + dict[str, Model.RunState] + ] # value = {'NOT_STARTED': , 'RUNNING': , 'HALTED': , 'COMPLETED': } + def __eq__(self, other: typing.Any) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: int) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: typing.Any) -> bool: ... + def __repr__(self) -> str: ... + def __setstate__(self, state: int) -> None: ... + def __str__(self) -> str: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + + COMPLETED: typing.ClassVar[Model.RunState] # value = + HALTED: typing.ClassVar[Model.RunState] # value = + NOT_STARTED: typing.ClassVar[Model.RunState] # value = + RUNNING: typing.ClassVar[Model.RunState] # value = + def __init__(self, timeline: Timeline, seeder: typing.Callable = ...) -> None: """ Constructs a model object with a timeline and (optionally) a seeder function for the random stream(s) """ @@ -139,7 +150,7 @@ class Model(): or input is required from an upstream model. The model can be subsequently resumed by calling the run() function. For trapping exceptional/error conditions, prefer to raise an exception, or return False from the Model.check() function """ - def modify(self, r: int) -> None: + def modify(self) -> None: """ User-overridable method used to modify state in a per-process basis for multiprocess model runs. Default behaviour is to do nothing. @@ -154,74 +165,101 @@ class Model(): @property def mc(self) -> MonteCarlo: """ - The model's Monte-Carlo engine - - :type: MonteCarlo + The model's Monte-Carlo engine + """ + @property + def run_state(self) -> Model.RunState: + """ + The model's current state - one of: + NOT_STARTED: model has not been run + RUNNING: model is in progress + HALTED: model has been explicitly halted by calling its halt() method + COMPLETED: model has run to the end of its timeline """ @property def timeline(self) -> Timeline: """ - The model's timeline object - - :type: Timeline + The model's timeline object """ - pass -class MonteCarlo(): + +class MonteCarlo: """ + The model's Monte-Carlo engine with configurable options for parallel execution """ + + @staticmethod + def deterministic_identical_stream() -> int: + """ + Returns a deterministic seed (19937). Input argument is ignored + """ + @staticmethod + def deterministic_independent_stream() -> int: + """ + Returns a deterministic seed that is a function of the input (19937+r). + The model uses the MPI rank as the input argument, allowing for differently seeded streams in each process + """ + @staticmethod + def nondeterministic_stream() -> int: + """ + Returns a random seed from the platform's random_device. Input argument is ignored + """ def __repr__(self) -> str: """ Prints a human-readable representation of the MonteCarlo engine """ - def arrivals(self, lambda_: FloatArray1d, dt: float, n: int, mingap: float) -> NPFloatArray: + def arrivals( + self, lambda_: npt.NDArray[numpy.float64], dt: float, n: int, mingap: float + ) -> npt.NDArray[numpy.float64]: """ Returns an array of n arrays of multiple arrival times from a nonhomogeneous Poisson process (with hazard rate lambda[i], time interval dt), with a minimum separation between events of mingap. Sampling uses the Lewis-Shedler "thinning" algorithm The final value of lambda must be zero, and thus arrivals don't always occur, indicated by a value of neworder.time.never() The inner dimension of the returned 2d array is governed by the the maximum number of arrivals sampled, and will thus vary """ - def counts(self, lambda_: FloatArray1d, dt: float) -> NPIntArray: + def counts( + self, lambda_: npt.NDArray[numpy.float64], dt: float + ) -> npt.NDArray[numpy.int64]: """ Returns an array of simulated arrival counts (within time dt) for each intensity in lambda """ - @staticmethod - def deterministic_identical_stream(r: int) -> int: - """ - Returns a deterministic seed (19937). Input argument is ignored - """ - @staticmethod - def deterministic_independent_stream(r: int) -> int: - """ - Returns a deterministic seed that is a function of the input (19937+r). - The model uses the MPI rank as the input argument, allowing for differently seeded streams in each process - """ @typing.overload - def first_arrival(self, lambda_: FloatArray1d, dt: float, n: int) -> NPFloatArray: + def first_arrival( + self, lambda_: npt.NDArray[numpy.float64], dt: float, n: int, minval: float + ) -> npt.NDArray[numpy.float64]: """ Returns an array of length n of first arrival times from a nonhomogeneous Poisson process (with hazard rate lambda[i], time interval dt), with a minimum start time of minval. Sampling uses the Lewis-Shedler "thinning" algorithm If the final value of lambda is zero, no arrival is indicated by a value of neworder.time.never() """ @typing.overload - def first_arrival(self, lambda_: FloatArray1d, dt: float, n: int, minval: float) -> NPFloatArray: + def first_arrival( + self, lambda_: npt.NDArray[numpy.float64], dt: float, n: int + ) -> npt.NDArray[numpy.float64]: """ Returns an array of length n of first arrival times from a nonhomogeneous Poisson process (with hazard rate lambda[i], time interval dt), with no minimum start time. Sampling uses the Lewis-Shedler "thinning" algorithm If the final value of lambda is zero, no arrival is indicated by a value of neworder.time.never() """ @typing.overload - def hazard(self, p: float, n: int) -> NPFloatArray: + def hazard(self, p: float, n: int) -> npt.NDArray[numpy.float64]: """ Returns an array of ones (with hazard rate lambda) or zeros of length n """ @typing.overload - def hazard(self, p: NPFloatArray) -> NPFloatArray: + def hazard(self, p: npt.NDArray[numpy.float64]) -> npt.NDArray[numpy.float64]: """ Returns an array of ones (with hazard rate lambda[i]) or zeros for each element in p """ @typing.overload - def next_arrival(self, startingpoints: FloatArray1d, lambda_: FloatArray1d, dt: float) -> NPFloatArray: + def next_arrival( + self, + startingpoints: npt.NDArray[numpy.float64], + lambda_: npt.NDArray[numpy.float64], + dt: float, + relative: bool, + minsep: float, + ) -> npt.NDArray[numpy.float64]: """ Returns an array of length n of subsequent arrival times from a nonhomogeneous Poisson process (with hazard rate lambda[i], time interval dt), with start times given by startingpoints with a minimum offset of mingap. Sampling uses the Lewis-Shedler "thinning" algorithm. @@ -229,7 +267,13 @@ class MonteCarlo(): If the final value of lambda is zero, no arrival is indicated by a value of neworder.time.never() """ @typing.overload - def next_arrival(self, startingpoints: FloatArray1d, lambda_: FloatArray1d, dt: float, relative: bool) -> NPFloatArray: + def next_arrival( + self, + startingpoints: npt.NDArray[numpy.float64], + lambda_: npt.NDArray[numpy.float64], + dt: float, + relative: bool, + ) -> npt.NDArray[numpy.float64]: """ Returns an array of length n of subsequent arrival times from a nonhomogeneous Poisson process (with hazard rate lambda[i], time interval dt), with start times given by startingpoints. Sampling uses the Lewis-Shedler "thinning" algorithm. @@ -237,17 +281,17 @@ class MonteCarlo(): If the final value of lambda is zero, no arrival is indicated by a value of neworder.time.never() """ @typing.overload - def next_arrival(self, startingpoints: FloatArray1d, lambda_: FloatArray1d, dt: float, relative: bool, minsep: float) -> NPFloatArray: + def next_arrival( + self, + startingpoints: npt.NDArray[numpy.float64], + lambda_: npt.NDArray[numpy.float64], + dt: float, + ) -> npt.NDArray[numpy.float64]: """ Returns an array of length n of subsequent arrival times from a nonhomogeneous Poisson process (with hazard rate lambda[i], time interval dt), with start times given by startingpoints. Sampling uses the Lewis-Shedler "thinning" algorithm. If the final value of lambda is zero, no arrival is indicated by a value of neworder.time.never() """ - @staticmethod - def nondeterministic_stream(r: int) -> int: - """ - Returns a random seed from the platform's random_device. Input argument is ignored - """ def raw(self) -> int: """ Returns a random 64-bit unsigned integer. Useful for seeding other generators. @@ -257,7 +301,9 @@ class MonteCarlo(): Resets the generator using the original seed. Use with care, esp in multi-process models with identical streams """ - def sample(self, n: int, cat_weights: NPFloatArray) -> NPIntArray: + def sample( + self, n: int, cat_weights: npt.NDArray[numpy.float64] + ) -> npt.NDArray[numpy.int64]: """ Returns an array of length n containing randomly sampled categorical values, weighted according to cat_weights """ @@ -270,63 +316,96 @@ class MonteCarlo(): Returns a hash of the internal state of the generator. Avoids the extra complexity of tranmitting variable-length strings over MPI. """ @typing.overload - def stopping(self, lambda_: float, n: int) -> NPFloatArray: + def stopping(self, lambda_: float, n: int) -> npt.NDArray[numpy.float64]: """ Returns an array of stopping times (with hazard rate lambda) of length n """ @typing.overload - def stopping(self, lambda_: NPFloatArray) -> NPFloatArray: + def stopping( + self, lambda_: npt.NDArray[numpy.float64] + ) -> npt.NDArray[numpy.float64]: """ Returns an array of stopping times (with hazard rate lambda[i]) for each element in lambda """ - def ustream(self, n: int) -> NPFloatArray: + def ustream(self, n: int) -> npt.NDArray[numpy.float64]: """ Returns an array of uniform random [0,1) variates of length n """ - pass + class NoTimeline(Timeline): """ + An arbitrary one step timeline, for continuous-time models with no explicit (discrete) timeline """ + def __init__(self) -> None: """ Constructs an arbitrary one step timeline, where the start and end times are undefined and there is a single step of size zero. Useful for continuous-time models """ - pass + class NumericTimeline(Timeline): """ + An custom non-calendar timeline where the user explicitly specifies the time points, which must be monotonically increasing. """ - def __init__(self, times: typing.List[float]) -> None: + + def __init__(self, times: list[float]) -> None: """ Constructs a timeline from an array of time points. """ - pass -class CalendarTimeline(Timeline): - """ - A calendar-based timeline - """ - @typing.overload - def __init__(self, start: date_t, end: date_t, step: int, unit: str) -> None: + +class Timeline: + def __init__(self) -> None: ... + def __repr__(self) -> str: """ - Constructs a calendar-based timeline, given start and end dates, an increment specified as a multiple of days, months or years + Prints a human-readable representation of the timeline object """ - @typing.overload - def __init__(self, start: date_t, step: int, unit: str) -> None: + @property + def at_end(self) -> bool: """ - Constructs an open-ended calendar-based timeline, given a start date and an increment specified as a multiple of days, months or years. - NB the model will run until the Model.halt() method is explicitly called (from inside the step() method). Note also that nsteps() will - return -1 for timelines constructed this way + Returns True if the current step is the end of the timeline """ - pass + @property + def dt(self) -> float: + """ + Returns the step size size of the timeline + """ + @property + def end(self) -> typing.Any: + """ + Returns the time of the end of the timeline + """ + @property + def index(self) -> int: + """ + Returns the index of the current step in the timeline + """ + @property + def nsteps(self) -> int: + """ + Returns the number of steps in the timeline (or -1 if open-ended) + """ + @property + def start(self) -> typing.Any: + """ + Returns the time of the start of the timeline + """ + @property + def time(self) -> typing.Any: + """ + Returns the time of the current step in the timeline + """ + def checked(checked: bool = True) -> None: """ Sets the checked flag, which determines whether the model runs checks during execution """ -def log(obj: object) -> None: + +def log(obj: typing.Any) -> None: """ The logging function. Prints obj to the console, annotated with process information """ + def run(model: Model) -> bool: """ Runs the model. If the model has previously run it will resume from the point at which it was given the "halt" instruction. This is useful @@ -335,12 +414,8 @@ def run(model: Model) -> bool: Returns: True if model succeeded, False otherwise """ + def verbose(verbose: bool = True) -> None: """ Sets the verbose flag, which toggles detailed runtime logs """ - -def as_np(mc: MonteCarlo) -> np.random.Generator: - """ - Returns an adapter enabling the MonteCarlo object to be used with numpy random functionality - """ diff --git a/neworder/df.pyi b/neworder/df.pyi index 4cb8082b..e953aa70 100644 --- a/neworder/df.pyi +++ b/neworder/df.pyi @@ -1,27 +1,31 @@ """ - Submodule for operations involving direct manipulation of pandas dataframes + +Submodule for operations involving direct manipulation of pandas dataframes """ + from __future__ import annotations -from typing import TypeVar -import neworder -import numpy as np -# _Shape = typing.Tuple[int, ...] -T = TypeVar("T") -nparray = np.ndarray[T, np.dtype[T]] +import typing -__all__ = [ - "testfunc", - "transition", - "unique_index" -] +import numpy +import numpy.typing as npt +import neworder -def testfunc(model: neworder.Model, df: object, colname: str) -> None: +__all__ = ["testfunc", "transition", "unique_index"] + +def testfunc(model: neworder.Model, df: typing.Any, colname: str) -> None: """ Test function for direct dataframe manipulation. Results may vary. Do not use. """ -def transition(model: neworder.Model, categories: nparray[np.int64], transition_matrix: nparray[np.float64], df: object, colname: str) -> None: + +def transition( + model: neworder.Model, + categories: npt.NDArray[numpy.int64], + transition_matrix: npt.NDArray[numpy.float64], + df: typing.Any, + colname: str, +) -> None: """ Randomly changes categorical data in a dataframe, according to supplied transition probabilities. Args: @@ -31,7 +35,8 @@ def transition(model: neworder.Model, categories: nparray[np.int64], transition_ df: The dataframe, which is modified in-place colname: The name of the column in the dataframe """ -def unique_index(n: int) -> nparray[np.int64]: + +def unique_index(n: int) -> npt.NDArray[numpy.int64]: """ Generates an array of n unique values, even across multiple processes, that can be used to unambiguously index multiple dataframes. """ diff --git a/neworder/domain.py b/neworder/domain.py index 074a31d7..e6ae0f42 100644 --- a/neworder/domain.py +++ b/neworder/domain.py @@ -3,244 +3,288 @@ """ from __future__ import annotations -from typing import Union, Optional, Any, Callable + from enum import Enum, auto +from typing import Any, Callable, Optional, Union + import numpy as np import numpy.typing as npt from scipy import signal # type: ignore NPFloatArray = npt.NDArray[np.float64] + class Edge(Enum): - """ - Edge behaviour - """ - UNBOUNDED = auto() - WRAP = auto() - CONSTRAIN = auto() - BOUNCE = auto() + """ + Edge behaviour + """ + + UNBOUNDED = auto() + WRAP = auto() + CONSTRAIN = auto() + BOUNCE = auto() class Domain: - """ - Base class for spatial domains. - """ + """ + Base class for spatial domains. + """ - def __init__(self, dim: int, edge: Edge, continuous: bool): - self.__dim = dim - self.__edge = edge - self.__continuous = continuous + def __init__(self, dim: int, edge: Edge, continuous: bool): + self.__dim = dim + self.__edge = edge + self.__continuous = continuous - @property - def dim(self) -> int: - """ The dimension of the spatial domain """ - return self.__dim + @property + def dim(self) -> int: + """The dimension of the spatial domain""" + return self.__dim - @property - def edge(self) -> Edge: - """ The tyoe of edge constraint """ - return self.__edge + @property + def edge(self) -> Edge: + """The tyoe of edge constraint""" + return self.__edge - @property - def continuous(self) -> bool: - """ Whether space is continuous or discrete """ - return self.__continuous + @property + def continuous(self) -> bool: + """Whether space is continuous or discrete""" + return self.__continuous class Space(Domain): - """ - Continuous rectangular n-dimensional finite or infinite domain. - If finite, positioning and/or movement near the domain boundary is - dictated by the `wrap` attribute. - """ - - @staticmethod - def unbounded(dim: int) -> Space: - """ Construct an unbounded Space """ - assert dim > 0 - return Space(np.full(dim, -np.inf), np.full(dim, +np.inf), edge=Edge.UNBOUNDED) - - def __init__(self, min: NPFloatArray, max: NPFloatArray, edge: Edge=Edge.CONSTRAIN): - assert len(min) and len(min) == len(max) - super().__init__(len(min), edge, True) - - # Space supports all edge behaviours - assert edge in [Edge.UNBOUNDED, Edge.WRAP, Edge.CONSTRAIN, Edge.BOUNCE] - - assert np.all(max > min) - - self.min = min - self.max = max - - @property - def extent(self) -> tuple[NPFloatArray, NPFloatArray]: - """ The extent of the space in terms of two opposing points """ - return self.min, self.max - - def move(self, positions: Any, - velocities: Any, - delta_t: float, - ungroup: bool=False) -> tuple[NPFloatArray, NPFloatArray]: - """ Returns translated positions AND velocities """ - # group tuples into a single array if necessary - if isinstance(positions, tuple): - positions = np.column_stack(positions) - if isinstance(velocities, tuple): - velocities = np.column_stack(velocities) - - assert positions.dtype == float - assert velocities.dtype == float - assert positions.shape[-1] == self.dim and velocities.shape[-1] == self.dim - if self.edge == Edge.UNBOUNDED: - p = positions + velocities * delta_t - v = velocities - elif self.edge == Edge.CONSTRAIN: - p = positions + velocities * delta_t - v = velocities - hitmin = np.where(p < self.min, 1, 0) - p = np.where(hitmin, self.min, p) - v = np.where(hitmin, 0, v) - hitmax = np.where(p > self.max, 1, 0) - p = np.where(hitmax, self.max, p) - v = np.where(hitmax, 0, v) - elif self.edge == Edge.BOUNCE: - p = positions + velocities * delta_t - v = velocities - hitmin = np.where(p < self.min, 1, 0) - p = np.where(hitmin, 2 * self.min - p, p) - v = np.where(hitmin, -v, v) - hitmax = np.where(p > self.max, 1, 0) - p = np.where(hitmax, 2 * self.max - p, p) - v = np.where(hitmax, -v, v) - else: - p = self.min + np.mod(positions + velocities * delta_t - self.min, self.max - self.min) - v = velocities - - if ungroup: - p = np.split(p, self.dim, axis=1) - v = np.split(v, self.dim, axis=1) - return p, v - - def dists2(self, positions: Union[tuple[NPFloatArray, ...], NPFloatArray], to_points: Optional[NPFloatArray]=None) -> tuple[NPFloatArray, Any]: - """ The squared distance between points and separations along each axis """ - # group tuples into a single array if necessary - if isinstance(positions, tuple): - positions = np.column_stack(positions) - if isinstance(to_points, tuple): - to_points = np.column_stack(to_points) - # distances w.r.t. self if to_points not explicitly specified - if to_points is None: - to_points = positions - assert positions.dtype == float - assert to_points.dtype == float - n = positions.shape[0] - m = to_points.shape[0] - d = positions.shape[1] - d2 = np.zeros((m, n)) - separations: tuple[Any, ...] = () - if self.edge != Edge.WRAP: - for i in range(d): - delta = np.tile(positions[:, i], m).reshape((m, n)) - np.repeat(to_points[:, i], n).reshape(m, n) - separations += (delta,) - d2 += delta * delta - else: # wrapped domains need special treatment - distance across an edge may be less than naive distance - for i in range(d): - delta = np.tile(positions[:, i], m).reshape((m, n)) - np.repeat(to_points[:, i], n).reshape(m, n) - r = self.max[i] - self.min[i] - delta = np.where(delta > r / 2, delta - r, delta) - delta = np.where(delta < -r / 2, delta + r, delta) - separations += (delta,) - d2 += delta * delta - - return d2, separations - - def dists(self, positions: Union[tuple[NPFloatArray, ...], NPFloatArray], to_points: Optional[NPFloatArray]=None) -> NPFloatArray: - """ Returns distances between the points""" - return np.sqrt(self.dists2(positions, to_points)[0]) - - def in_range(self, distance: Any, positions: Any, count: Optional[bool]=False) -> NPFloatArray: - """ Returns either indices or counts of points within the specified distance from each point """ - ind = np.where(self.dists2(positions)[0] < distance * distance, 1, 0) - # fill diagonal so as not to include self - TODO how does this work if to_points!=positions - np.fill_diagonal(ind, 0) - return ind if not count else np.sum(ind, axis=1) - - def __repr__(self) -> str: - return "%s dim=%d min=%s max=%s edge=%s" % (self.__class__.__name__, self.dim, self.min, self.max, self.edge) + """ + Continuous rectangular n-dimensional finite or infinite domain. + If finite, positioning and/or movement near the domain boundary is + dictated by the `wrap` attribute. + """ + + @staticmethod + def unbounded(dim: int) -> Space: + """Construct an unbounded Space""" + assert dim > 0 + return Space(np.full(dim, -np.inf), np.full(dim, +np.inf), edge=Edge.UNBOUNDED) + + def __init__( + self, min: NPFloatArray, max: NPFloatArray, edge: Edge = Edge.CONSTRAIN + ): + assert len(min) and len(min) == len(max) + super().__init__(len(min), edge, True) + + # Space supports all edge behaviours + assert edge in [Edge.UNBOUNDED, Edge.WRAP, Edge.CONSTRAIN, Edge.BOUNCE] + + assert np.all(max > min) + + self.min = min + self.max = max + + @property + def extent(self) -> tuple[NPFloatArray, NPFloatArray]: + """The extent of the space in terms of two opposing points""" + return self.min, self.max + + def move( + self, positions: Any, velocities: Any, delta_t: float, ungroup: bool = False + ) -> tuple[NPFloatArray, NPFloatArray]: + """Returns translated positions AND velocities""" + # group tuples into a single array if necessary + if isinstance(positions, tuple): + positions = np.column_stack(positions) + if isinstance(velocities, tuple): + velocities = np.column_stack(velocities) + + assert positions.dtype == float + assert velocities.dtype == float + assert positions.shape[-1] == self.dim and velocities.shape[-1] == self.dim + if self.edge == Edge.UNBOUNDED: + p = positions + velocities * delta_t + v = velocities + elif self.edge == Edge.CONSTRAIN: + p = positions + velocities * delta_t + v = velocities + hitmin = np.where(p < self.min, 1, 0) + p = np.where(hitmin, self.min, p) + v = np.where(hitmin, 0, v) + hitmax = np.where(p > self.max, 1, 0) + p = np.where(hitmax, self.max, p) + v = np.where(hitmax, 0, v) + elif self.edge == Edge.BOUNCE: + p = positions + velocities * delta_t + v = velocities + hitmin = np.where(p < self.min, 1, 0) + p = np.where(hitmin, 2 * self.min - p, p) + v = np.where(hitmin, -v, v) + hitmax = np.where(p > self.max, 1, 0) + p = np.where(hitmax, 2 * self.max - p, p) + v = np.where(hitmax, -v, v) + else: + p = self.min + np.mod( + positions + velocities * delta_t - self.min, self.max - self.min + ) + v = velocities + + if ungroup: + p = np.split(p, self.dim, axis=1) + v = np.split(v, self.dim, axis=1) + return p, v + + def dists2( + self, + positions: Union[tuple[NPFloatArray, ...], NPFloatArray], + to_points: Optional[NPFloatArray] = None, + ) -> tuple[NPFloatArray, Any]: + """The squared distance between points and separations along each axis""" + # group tuples into a single array if necessary + if isinstance(positions, tuple): + positions = np.column_stack(positions) + if isinstance(to_points, tuple): + to_points = np.column_stack(to_points) + # distances w.r.t. self if to_points not explicitly specified + if to_points is None: + to_points = positions + assert positions.dtype == float + assert to_points.dtype == float + n = positions.shape[0] + m = to_points.shape[0] + d = positions.shape[1] + d2 = np.zeros((m, n)) + separations: tuple[Any, ...] = () + if self.edge != Edge.WRAP: + for i in range(d): + delta = np.tile(positions[:, i], m).reshape((m, n)) - np.repeat( + to_points[:, i], n + ).reshape(m, n) + separations += (delta,) + d2 += delta * delta + else: # wrapped domains need special treatment - distance across an edge may be less than naive distance + for i in range(d): + delta = np.tile(positions[:, i], m).reshape((m, n)) - np.repeat( + to_points[:, i], n + ).reshape(m, n) + r = self.max[i] - self.min[i] + delta = np.where(delta > r / 2, delta - r, delta) + delta = np.where(delta < -r / 2, delta + r, delta) + separations += (delta,) + d2 += delta * delta + + return d2, separations + + def dists( + self, + positions: Union[tuple[NPFloatArray, ...], NPFloatArray], + to_points: Optional[NPFloatArray] = None, + ) -> NPFloatArray: + """Returns distances between the points""" + return np.sqrt(self.dists2(positions, to_points)[0]) + + def in_range( + self, distance: Any, positions: Any, count: Optional[bool] = False + ) -> NPFloatArray: + """Returns either indices or counts of points within the specified distance from each point""" + ind = np.where(self.dists2(positions)[0] < distance * distance, 1, 0) + # fill diagonal so as not to include self - TODO how does this work if to_points!=positions + np.fill_diagonal(ind, 0) + return ind if not count else np.sum(ind, axis=1) + + def __repr__(self) -> str: + return "%s dim=%d min=%s max=%s edge=%s" % ( + self.__class__.__name__, + self.dim, + self.min, + self.max, + self.edge, + ) + def _bounce(i: int, N: int) -> int: - s = (i // N) % 2 - k = i % N - return N * s + (-1) ** s * k + s = (i // N) % 2 + k = i % N + return N * s + (-1) ** s * k -class StateGrid(Domain): - """ - Discrete rectangular n-dimensional finite grid domain with each cell having an integer state. - Allows for counting of neighbours according to the supported edge behaviours: - CONSTRAIN (no neighburs over edge), WRAP (toroidal), BOUNCE (reflect) - """ - - __mode_lookup: dict[Edge, str] = { - Edge.CONSTRAIN: "constant", - Edge.WRAP: "wrap", - Edge.BOUNCE: "reflect" - } - - def __init__(self, initial_values: NPFloatArray, edge: Edge=Edge.CONSTRAIN): - super().__init__(initial_values.ndim, edge, False) - - # StateGrid supports two edge behaviours - if edge not in [Edge.WRAP, Edge.CONSTRAIN, Edge.BOUNCE]: - raise ValueError("edge policy must be one of Edge.WRAP, Edge.CONSTRAIN, Edge.BOUNCE") - - if initial_values.ndim < 1: - raise ValueError("state array must have dimension of 1 or above") - if initial_values.size < 1: - raise ValueError("state array must have size of 1 or above in every dimension") - - self.state = initial_values - - # int neighbour kernel (not including self) - self.kernel = np.ones([3] * self.dim) - self.kernel[(1,) * self.dim] = 0 - - def __get_point(self, p: tuple[int, ...]) -> tuple[int, ...]: - assert len(p) == self.state.ndim, f"dimensionality mismatch: {len(p)} but grid has {self.state.ndim}" - match self.edge: - case Edge.WRAP: - p = tuple(p[i] % self.state.shape[i] for i in range(len(p))) - case Edge.CONSTRAIN: - p = tuple(np.clip(p[i], 0, self.state.shape[i] - 1) for i in range(len(p))) - case Edge.BOUNCE: - p = tuple(_bounce(p[i], self.state.shape[i] - 1) for i in range(len(p))) - return p - - def __getitem__(self, p: tuple[int, ...]) -> Any: - return self.state[self.__get_point(p)] - - def __setitem__(self, p: tuple[int, ...], value: Any) -> None: - self.state[self.__get_point(p)] = value - - def shift(self, position: tuple[int, ...], delta: tuple[int, ...]) -> tuple[int, ...]: - """This translates a point according to the grid's edge behaviour. It does *not* change any state""" - point = self.__get_point(position) - p = tuple(point[i] + delta[i] for i in range(self.dim)) - return self.__get_point(p) - - @property - def extent(self) -> Any: - """ The extent of the space in terms of two opposing points """ - return self.state.shape - - def count_neighbours(self, indicator: Callable[[float], bool]=lambda x: x == 1) -> NPFloatArray: - """ Counts neighbouring cells with a state indicated by supplied indicator function """ - - ind: NPFloatArray = np.array([indicator(x) for x in self.state]).astype(int) # automagically preserves shape - # pad with boundary according to edge policy - bounded = np.pad(ind, pad_width=1, mode=self.__mode_lookup[self.edge]) # type: ignore # bug? - - # count neighbours, drop padding, covert to int - count = signal.convolve(bounded, self.kernel, mode="same", method="direct")[(slice(1, -1),) * self.dim].astype(int) - - return count +class StateGrid(Domain): + """ + Discrete rectangular n-dimensional finite grid domain with each cell having an integer state. + Allows for counting of neighbours according to the supported edge behaviours: + CONSTRAIN (no neighburs over edge), WRAP (toroidal), BOUNCE (reflect) + """ + + __mode_lookup: dict[Edge, str] = { + Edge.CONSTRAIN: "constant", + Edge.WRAP: "wrap", + Edge.BOUNCE: "reflect", + } + + def __init__(self, initial_values: NPFloatArray, edge: Edge = Edge.CONSTRAIN): + super().__init__(initial_values.ndim, edge, False) + + # StateGrid supports two edge behaviours + if edge not in [Edge.WRAP, Edge.CONSTRAIN, Edge.BOUNCE]: + raise ValueError( + "edge policy must be one of Edge.WRAP, Edge.CONSTRAIN, Edge.BOUNCE" + ) + + if initial_values.ndim < 1: + raise ValueError("state array must have dimension of 1 or above") + if initial_values.size < 1: + raise ValueError( + "state array must have size of 1 or above in every dimension" + ) + + self.state = initial_values + + # int neighbour kernel (not including self) + self.kernel = np.ones([3] * self.dim) + self.kernel[(1,) * self.dim] = 0 + + def __get_point(self, p: tuple[int, ...]) -> tuple[int, ...]: + assert ( + len(p) == self.state.ndim + ), f"dimensionality mismatch: {len(p)} but grid has {self.state.ndim}" + match self.edge: + case Edge.WRAP: + p = tuple(p[i] % self.state.shape[i] for i in range(len(p))) + case Edge.CONSTRAIN: + p = tuple( + np.clip(p[i], 0, self.state.shape[i] - 1) for i in range(len(p)) + ) + case Edge.BOUNCE: + p = tuple(_bounce(p[i], self.state.shape[i] - 1) for i in range(len(p))) + return p + + def __getitem__(self, p: tuple[int, ...]) -> Any: + return self.state[self.__get_point(p)] + + def __setitem__(self, p: tuple[int, ...], value: Any) -> None: + self.state[self.__get_point(p)] = value + + def shift( + self, position: tuple[int, ...], delta: tuple[int, ...] + ) -> tuple[int, ...]: + """This translates a point according to the grid's edge behaviour. It does *not* change any state""" + point = self.__get_point(position) + p = tuple(point[i] + delta[i] for i in range(self.dim)) + return self.__get_point(p) + + @property + def extent(self) -> Any: + """The extent of the space in terms of two opposing points""" + return self.state.shape + + def count_neighbours( + self, indicator: Callable[[float], bool] = lambda x: x == 1 + ) -> NPFloatArray: + """Counts neighbouring cells with a state indicated by supplied indicator function""" + + ind: NPFloatArray = np.array([indicator(x) for x in self.state]).astype( + int + ) # automagically preserves shape + # pad with boundary according to edge policy + bounded = np.pad(ind, pad_width=1, mode=self.__mode_lookup[self.edge]) # type: ignore # bug? + + # count neighbours, drop padding, covert to int + count = signal.convolve(bounded, self.kernel, mode="same", method="direct")[ + (slice(1, -1),) * self.dim + ].astype(int) + + return count diff --git a/neworder/geospatial.py b/neworder/geospatial.py index f75c052e..f45511bc 100644 --- a/neworder/geospatial.py +++ b/neworder/geospatial.py @@ -1,68 +1,83 @@ from __future__ import annotations + from typing import Any, Generator try: - import networkx as nx # type: ignore[import] - import osmnx as ox # type: ignore[import] - from shapely.ops import linemerge # type: ignore[import] - from shapely.geometry import LineString, MultiLineString, Polygon # type: ignore[import] - import geopandas as gpd # type: ignore[import] + import geopandas as gpd # type: ignore[import] + import networkx as nx # type: ignore[import] + import osmnx as ox # type: ignore[import] + from shapely.geometry import ( # type: ignore[import] + LineString, + MultiLineString, + Polygon, + ) + from shapely.ops import linemerge # type: ignore[import] except ImportError: - raise ImportError("""optional dependencies are not installed. + raise ImportError( + """optional dependencies are not installed. Reinstalling neworder with the geospatial option should fix this: -pip install neworder[geospatial]""") +pip install neworder[geospatial]""" + ) class GeospatialGraph: - """ - Spatial domains on Earth's surface that are defined by graphs/networks. - Use of this class requires "geospatial" extras: pip install neworder[geospatial] - """ - - def __init__(self, G: nx.Graph, crs: str | None = None) -> None: - if crs: - self.__graph = ox.project_graph(G, to_crs=crs) - else: - self.__graph = G - self.__nodes, self.__edges = ox.graph_to_gdfs(self.__graph) + """ + Spatial domains on Earth's surface that are defined by graphs/networks. + Use of this class requires "geospatial" extras: pip install neworder[geospatial] + """ - @classmethod - def from_point(cls, point: tuple[float, float], *args: Any, crs: str | None = None, **kwargs: Any) -> GeospatialGraph: - G = ox.graph_from_point(point, *args, **kwargs) - return cls(G, crs) + def __init__(self, G: nx.Graph, crs: str | None = None) -> None: + if crs: + self.__graph = ox.project_graph(G, to_crs=crs) + else: + self.__graph = G + self.__nodes, self.__edges = ox.graph_to_gdfs(self.__graph) - @property - def crs(self) -> str: - return self.__graph.graph["crs"] + @classmethod + def from_point( + cls, + point: tuple[float, float], + *args: Any, + crs: str | None = None, + **kwargs: Any, + ) -> GeospatialGraph: + G = ox.graph_from_point(point, *args, **kwargs) + return cls(G, crs) - @property - def graph(self) -> nx.MultiDiGraph | nx.Graph: - return self.__graph + @property + def crs(self) -> str: + return self.__graph.graph["crs"] - @property - def all_nodes(self) -> gpd.GeoDataFrame: - return self.__nodes + @property + def graph(self) -> nx.MultiDiGraph | nx.Graph: + return self.__graph - @property - def all_edges(self) -> gpd.GeoDataFrame: - return self.__edges + @property + def all_nodes(self) -> gpd.GeoDataFrame: + return self.__nodes - def edges_to(self, node: int) -> Generator[list[tuple[int, int]], None, None]: - return self.__graph.in_edges(node) + @property + def all_edges(self) -> gpd.GeoDataFrame: + return self.__edges - def edges_from(self, node: int) -> Generator[list[tuple[int, int]], None, None]: - return self.__graph.out_edges(node) + def edges_to(self, node: int) -> Generator[list[tuple[int, int]], None, None]: + return self.__graph.in_edges(node) - def shortest_path(self, origin: int, dest: int, **kwargs: Any) -> LineString: - nodes = nx.shortest_path(self.__graph, origin, dest, **kwargs) - route_segments = [self.__edges.loc[(nodes[i], nodes[i+1], 0), "geometry"] for i in range(len(nodes) - 1)] - return linemerge(MultiLineString(route_segments)) + def edges_from(self, node: int) -> Generator[list[tuple[int, int]], None, None]: + return self.__graph.out_edges(node) - def subgraph(self, origin: int, **kwargs: Any) -> nx.Graph: - return nx.ego_graph(self.__graph, origin, **kwargs) + def shortest_path(self, origin: int, dest: int, **kwargs: Any) -> LineString: + nodes = nx.shortest_path(self.__graph, origin, dest, **kwargs) + route_segments = [ + self.__edges.loc[(nodes[i], nodes[i + 1], 0), "geometry"] + for i in range(len(nodes) - 1) + ] + return linemerge(MultiLineString(route_segments)) - def isochrone(self, origin: int, **kwargs: Any) -> Polygon: - subgraph = nx.ego_graph(self.__graph, origin, **kwargs) - nodes, _ = ox.graph_to_gdfs(subgraph) - return nodes.geometry.unary_union.convex_hull + def subgraph(self, origin: int, **kwargs: Any) -> nx.Graph: + return nx.ego_graph(self.__graph, origin, **kwargs) + def isochrone(self, origin: int, **kwargs: Any) -> Polygon: + subgraph = nx.ego_graph(self.__graph, origin, **kwargs) + nodes, _ = ox.graph_to_gdfs(subgraph) + return nodes.geometry.unary_union.convex_hull diff --git a/neworder/mc.py b/neworder/mc.py index bdc79733..e143c3d7 100644 --- a/neworder/mc.py +++ b/neworder/mc.py @@ -3,13 +3,14 @@ def as_np(mc: MonteCarlo) -> np.random.Generator: - """ - Returns an adapter enabling the MonteCarlo object to be used with numpy random functionality - """ - class _NpAdapter(np.random.BitGenerator): - def __init__(self, rng: MonteCarlo): - super().__init__(0) - self.rng = rng - self.rng.init_bitgen(self.capsule) # type: ignore - - return np.random.Generator(_NpAdapter(mc)) # type: ignore + """ + Returns an adapter enabling the MonteCarlo object to be used with numpy random functionality + """ + + class _NpAdapter(np.random.BitGenerator): + def __init__(self, rng: MonteCarlo): + super().__init__(0) + self.rng = rng + self.rng.init_bitgen(self.capsule) # type: ignore + + return np.random.Generator(_NpAdapter(mc)) # type: ignore diff --git a/neworder/mpi.pyi b/neworder/mpi.pyi index 91dc953a..10a53deb 100644 --- a/neworder/mpi.pyi +++ b/neworder/mpi.pyi @@ -1,20 +1,17 @@ """ - Submodule for basic MPI environment discovery. -""" -from __future__ import annotations -from typing import Any -__all__ = [ - "COMM", - "RANK", - "SIZE" -] +Submodule for basic MPI environment discovery, containing the following attributes: -COMM: Any -"""The MPI communicator if neworder has been installed with the parallel option, otherwise None.""" +RANK: the process rank (0 in serial mode) +SIZE: the number of processes (1 in serial mode) +COMM: the MPI communicator (None in serial mode) +""" + +from __future__ import annotations -RANK: int -"""The MPI process rank. 0 in serial mode.""" +import mpi4py.MPI -SIZE: int -"""The number of MPI processes. 1 in serial mode""" +__all__ = ["COMM", "RANK", "SIZE"] +COMM: mpi4py.MPI.Intracomm # value = +RANK: int = 0 +SIZE: int = 1 diff --git a/neworder/stats.pyi b/neworder/stats.pyi index eef2b8aa..e495bd88 100644 --- a/neworder/stats.pyi +++ b/neworder/stats.pyi @@ -1,21 +1,21 @@ """ - Submodule for statistical functions + +Submodule for statistical functions """ + from __future__ import annotations -from typing import overload, TypeVar -import numpy as np -#_Shape = typing.Tuple[int, ...] -__all__ = [ - "logistic", - "logit" -] +import typing + +import numpy +import numpy.typing as npt -T = TypeVar("T") -nparray = np.ndarray[T, np.dtype[T]] +__all__ = ["logistic", "logit"] -@overload -def logistic(x: nparray[np.float64]) -> nparray[np.float64]: +@typing.overload +def logistic( + x: npt.NDArray[numpy.float64], x0: float, k: float +) -> npt.NDArray[numpy.float64]: """ Computes the logistic function on the supplied values. Args: @@ -24,29 +24,30 @@ def logistic(x: nparray[np.float64]) -> nparray[np.float64]: x0: the midpoint location Returns: The function values + """ - +@typing.overload +def logistic(x: npt.NDArray[numpy.float64], k: float) -> npt.NDArray[numpy.float64]: + """ Computes the logistic function with x0=0 on the supplied values. Args: x: The input values. k: The growth rate Returns: The function values + """ - +@typing.overload +def logistic(x: npt.NDArray[numpy.float64]) -> npt.NDArray[numpy.float64]: + """ Computes the logistic function with k=1 and x0=0 on the supplied values. Args: x: The input values. Returns: The function values """ -@overload -def logistic(x: nparray[np.float64], k: float) -> nparray[np.float64]: - pass -@overload -def logistic(x: nparray[np.float64], x0: float, k: float) -> nparray[np.float64]: - pass -def logit(x: nparray[np.float64]) -> nparray[np.float64]: + +def logit(x: npt.NDArray[numpy.float64]) -> npt.NDArray[numpy.float64]: """ Computes the logit function on the supplied values. Args: diff --git a/neworder/time.pyi b/neworder/time.pyi index 5e40b55c..d14dab69 100644 --- a/neworder/time.pyi +++ b/neworder/time.pyi @@ -1,18 +1,19 @@ """ - Temporal values and comparison. + +Temporal values and comparison, including the attributes: +NEVER: a value that compares unequal to any value, including itself. +DISTANT_PAST: a value that compares less than any other value but itself and NEVER +FAR_FUTURE: a value that compares greater than any other value but itself and NEVER """ + from __future__ import annotations + import typing -import numpy as np -import numpy.typing as npt -__all__ = [ - "DISTANT_PAST", - "FAR_FUTURE", - "NEVER", - "isnever" -] +import numpy +import numpy.typing as npt +__all__ = ["DISTANT_PAST", "FAR_FUTURE", "NEVER", "isnever"] @typing.overload def isnever(t: float) -> bool: @@ -22,16 +23,12 @@ def isnever(t: float) -> bool: """ @typing.overload -def isnever(t: npt.NDArray[np.float64] | list[float]) -> npt.NDArray[np.bool8]: +def isnever(t: npt.NDArray[numpy.float64]) -> npt.NDArray: """ Returns an array of booleans corresponding to whether the element of an array correspond to "never". As "never" is implemented as a floating-point NaN, direct comparison will always fails, since NaN != NaN. """ -DISTANT_PAST: float # value = -inf -"""A value that compares less than any other value but itself and NEVER""" - -FAR_FUTURE: float # value = inf -"""A value that compares greater than any other value but itself and NEVER""" -NEVER: float # value = nan -"""A value that compares unequal to any value, including itself""" +DISTANT_PAST: float # value = -inf +FAR_FUTURE: float # value = inf +NEVER: float # value = nan diff --git a/pyproject.toml b/pyproject.toml index 73dbce9c..c20ee5e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] @@ -60,8 +61,8 @@ dev = [ [project.urls] -"Homepage" = "https://github.com/pypa/sampleproject" -"Bug Tracker" = "https://github.com/pypa/sampleproject/issues" +"Homepage" = "https://neworder.readthedocs.io/" +"Bug Tracker" = "https://github.com/virgesmith/neworder/issues" [tool.pytest.ini_options] minversion = "6.0" @@ -69,9 +70,9 @@ testpaths = [ "test" ] -[tool.ruff] -select = ["E", "F"] +[tool.ruff.lint] +select = ["E", "F", "I"] ignore = ["E501"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "**/__init__.py" = ["F401", "F403"] \ No newline at end of file diff --git a/src/Model.cpp b/src/Model.cpp index 3ddeab46..49a2b26f 100644 --- a/src/Model.cpp +++ b/src/Model.cpp @@ -10,7 +10,7 @@ #include no::Model::Model(no::Timeline& timeline, const py::function& seeder) - : m_timeline(timeline), m_timeline_handle(py::cast(&timeline)), + : m_runState(no::Model::NOT_STARTED), m_timeline(timeline), m_timeline_handle(py::cast(&timeline)), m_monteCarlo(seeder().cast()) { no::log("model init: timeline=%% mc=%%"s % m_timeline.repr() % m_monteCarlo.repr()); @@ -26,7 +26,7 @@ void no::Model::modify() void no::Model::halt() { no::log("sending halt signal to Model::run()"); - no::env::halt = true; + m_runState = Model::HALTED; } bool no::Model::check() @@ -56,10 +56,18 @@ bool no::Model::run(Model& model) // (we can use the methods for C++ implementations, but not for python implementations) auto pytimeline = PyAccessor(model.timeline()); + // check the model hasn't already completed + if (pytimeline.get_as("at_end")) { + throw py::stop_iteration("Model has already run to completion. Reinstantiate the timeline if necessary"); + } + + model.m_runState = no::Model::RUNNING; + // get the Model class name const std::string& model_name = py::cast(&model).attr("__class__").attr("__name__").cast(); - no::log("starting %% model run. start time=%%"s % model_name % pytimeline.get("start")); + + no::log("starting %% model run. start time=%%"s % model_name % pytimeline.get("time")); // apply the modifier, if implemented in the derived class no::log("t=%%(%%) %%.modify(%%)"s % pytimeline.get("time") % pytimeline.get("index") % model_name % rank); @@ -67,7 +75,7 @@ bool no::Model::run(Model& model) // Loop over timeline bool ok = true; - while (!pytimeline.get_as("at_end")) + while (model.m_runState == Model::RUNNING) { py::object t = pytimeline.get("time"); int64_t timeindex = pytimeline.get_as("index"); @@ -92,19 +100,18 @@ bool no::Model::run(Model& model) } // check python hasn't signalled early termination - if (no::env::halt) + if (model.m_runState == no::Model::HALTED) { no::log("t=%%(%%) received halt signal"s % t % timeindex); - // reset the flag so that subsequent model runs don't halt immediately - no::env::halt = false; - break; } - } - // call the finalise method (if not explicitly halted mid-timeline) - if (pytimeline.get_as("at_end")) - { - no::log("t=%%(%%) %%.finalise()"s % pytimeline.get("time") % pytimeline.get("index") % model_name ); - model.finalise(); + + // normal completion if not explicitly halted + if (model.m_runState == no::Model::RUNNING && pytimeline.get_as("at_end")) + { + model.m_runState = Model::COMPLETED; + no::log("t=%%(%%) %%.finalise()"s % pytimeline.get("time") % pytimeline.get("index") % model_name ); + model.finalise(); + } } no::log("%% exec time=%%s"s % (ok ? "SUCCESS": "ERRORED") % timer.elapsed_s()); return ok; diff --git a/src/Model.h b/src/Model.h index 6642a744..ef03cf37 100644 --- a/src/Model.h +++ b/src/Model.h @@ -5,57 +5,70 @@ #include "MonteCarlo.h" #include "Module.h" -namespace no { +namespace no +{ -class Environment; + class Environment; -class NEWORDER_EXPORT Model -{ -public: - Model(no::Timeline& timeline, const py::function& seeder); + class NEWORDER_EXPORT Model + { + public: + // deliberately use pre-C++11 enum - scope will be e.g. Model::RUN + enum RunState + { + NOT_STARTED, + RUNNING, + HALTED, // immediate exit without calling finalise + COMPLETED // reached end of timeline, finalise called + }; - virtual ~Model() = default; + Model(no::Timeline &timeline, const py::function &seeder); - Model(const Model&) = delete; - Model& operator=(const Model&) = delete; - Model(Model&&) = delete; - Model& operator=(Model&&) = delete; + virtual ~Model() = default; - static bool run(Model& model); + Model(const Model &) = delete; + Model &operator=(const Model &) = delete; + Model(Model &&) = delete; + Model &operator=(Model &&) = delete; - // getters - Timeline& timeline() { return m_timeline; } - MonteCarlo& mc() { return m_monteCarlo; } + static bool run(Model &model); - // functions to override - virtual void modify(); // optional, parallel runs only - virtual void step() = 0; // compulsory - virtual bool check(); // optional - virtual void finalise(); // optional + // getters + Timeline &timeline() { return m_timeline; } + MonteCarlo &mc() { return m_monteCarlo; } - // set the halt flag - void halt(); + // functions to override + virtual void modify(); // optional, parallel runs only + virtual void step() = 0; // compulsory + virtual bool check(); // optional + virtual void finalise(); // optional -private: - Timeline& m_timeline; - py::object m_timeline_handle; // ensures above ref isnt deleted during the lifetime of this object - MonteCarlo m_monteCarlo; -}; + // set the halt flag + void halt(); + // get the run state + RunState runState() const { return m_runState; } -class PyModel: private Model -{ - using Model::Model; - using Model::operator=; + private: + RunState m_runState; + Timeline &m_timeline; + py::object m_timeline_handle; // ensures above ref isnt deleted during the lifetime of this object + MonteCarlo m_monteCarlo; + }; + + class PyModel : private Model + { + using Model::Model; + using Model::operator=; - // trampoline methods - void modify() override { PYBIND11_OVERRIDE(void, Model, modify); } + // trampoline methods + void modify() override { PYBIND11_OVERRIDE(void, Model, modify); } - void step() override { PYBIND11_OVERRIDE_PURE(void, Model, step); } + void step() override { PYBIND11_OVERRIDE_PURE(void, Model, step); } - bool check() override { PYBIND11_OVERRIDE(bool, Model, check); } + bool check() override { PYBIND11_OVERRIDE(bool, Model, check); } - void finalise() override { PYBIND11_OVERRIDE(void, Model, finalise); } -}; + void finalise() override { PYBIND11_OVERRIDE(void, Model, finalise); } + }; } \ No newline at end of file diff --git a/src/Module.cpp b/src/Module.cpp index 84ecfaa3..e5bb538e 100644 --- a/src/Module.cpp +++ b/src/Module.cpp @@ -28,7 +28,6 @@ std::atomic_int no::env::rank = -1; std::atomic_int no::env::size = -1; std::atomic_bool no::env::verbose = false; std::atomic_bool no::env::checked = true; -std::atomic_bool no::env::halt = false; std::atomic_int64_t no::env::uniqueIndex = -1; // these types are not trivially copyable so can't be atomic std::string no::env::logPrefix[2]; @@ -81,9 +80,6 @@ PYBIND11_MODULE(_neworder_core, m) auto time = m.def_submodule("time", time_docstr) .def("isnever", no::time::isnever, time_isnever_docstr, "t"_a) // scalar .def("isnever", no::time::isnever_a, time_isnever_a_docstr, "t"_a); // array - // .def("distant_past", no::time::distant_past, time_distant_past_docstr) - // .def("far_future", no::time::far_future, time_far_future_docstr) - // .def("never", no::time::never, time_never_docstr) time.attr("DISTANT_PAST") = no::time::distant_past(); time.attr("FAR_FUTURE") = no::time::far_future(); time.attr("NEVER") = no::time::never(); @@ -173,12 +169,13 @@ PYBIND11_MODULE(_neworder_core, m) .def("__repr__", &no::MonteCarlo::repr, mc_repr_docstr); // Microsimulation (or ABM) model class - py::class_(m, "Model", model_docstr) + auto model = py::class_(m, "Model", model_docstr) .def(py::init(), model_init_docstr,"timeline"_a, "seeder"_a = py::cpp_function(no::MonteCarlo::deterministic_independent_stream)) // properties are readonly only in the sense you can't assign to them; you CAN call their mutable methods .def_property_readonly("timeline", &no::Model::timeline, model_timeline_docstr) .def_property_readonly("mc", &no::Model::mc, model_mc_docstr) + .def_property_readonly("run_state", &no::Model::runState, model_runstate_docstr) .def("modify", &no::Model::modify, model_modify_docstr) .def("step", &no::Model::step, model_step_docstr) .def("check", &no::Model::check, model_check_docstr) @@ -186,6 +183,13 @@ PYBIND11_MODULE(_neworder_core, m) .def("halt", &no::Model::halt, model_halt_docstr); // NB the all-important run function is not exposed to python, it can only be executed via the `neworder.run` function + py::enum_(model, "RunState") + .value("NOT_STARTED", no::Model::NOT_STARTED) + .value("RUNNING", no::Model::RUNNING) + .value("HALTED", no::Model::HALTED) + .value("COMPLETED", no::Model::COMPLETED) + .export_values(); + // statistical utils m.def_submodule("stats", stats_docstr) .def("logistic", no::logistic, diff --git a/src/Module.h b/src/Module.h index 44dc2716..5104698b 100644 --- a/src/Module.h +++ b/src/Module.h @@ -12,7 +12,6 @@ namespace env { extern std::atomic_bool verbose; extern std::atomic_bool checked; -extern std::atomic_bool halt; extern std::atomic_int rank; extern std::atomic_int size; extern std::atomic_int64_t uniqueIndex; diff --git a/src/Module_docstr.cpp b/src/Module_docstr.cpp index 405323b0..22b324c3 100644 --- a/src/Module_docstr.cpp +++ b/src/Module_docstr.cpp @@ -225,6 +225,13 @@ const char* model_timeline_docstr = R"docstr( const char* model_mc_docstr = R"docstr( The model's Monte-Carlo engine )docstr"; +const char* model_runstate_docstr = R"docstr( + The model's current state - one of: + NOT_STARTED: model has not been run + RUNNING: model is in progress + HALTED: model has been explicitly halted by calling its halt() method + COMPLETED: model has run to the end of its timeline +)docstr"; const char* model_modify_docstr = R"docstr( User-overridable method used to modify state in a per-process basis for multiprocess model runs. Default behaviour is to do nothing. diff --git a/test/benchmark.py b/test/benchmark.py index 1d129ebe..f8e9db3e 100644 --- a/test/benchmark.py +++ b/test/benchmark.py @@ -1,125 +1,143 @@ import time + import numpy as np import pandas as pd # type: ignore -import neworder as no +import neworder as no no.verbose() # define some global variables describing where the starting population and the parameters of the dynamics come from INITIAL_POPULATION = "./ssm_hh_E09000001_OA11_2011.csv" -t = np.array([ - [0.9, 0.05, 0.05, 0., 0., 0. ], - [0.05, 0.9, 0.04, 0.01, 0., 0. ], - [0., 0.05, 0.9, 0.05, 0., 0. ], - [0., 0., 0.05, 0.9, 0.05, 0. ], - [0.1, 0.1, 0.1, 0.1, 0.5, 0.1 ], - [0., 0., 0., 0., 0.2, 0.8 ]]) +t = np.array( + [ + [0.9, 0.05, 0.05, 0.0, 0.0, 0.0], + [0.05, 0.9, 0.04, 0.01, 0.0, 0.0], + [0.0, 0.05, 0.9, 0.05, 0.0, 0.0], + [0.0, 0.0, 0.05, 0.9, 0.05, 0.0], + [0.1, 0.1, 0.1, 0.1, 0.5, 0.1], + [0.0, 0.0, 0.0, 0.0, 0.2, 0.8], + ] +) c = np.array([-1, 1, 2, 3, 4, 5]) + def get_data() -> pd.DataFrame: - hh = pd.read_csv(INITIAL_POPULATION)#, nrows=100) - hh = pd.concat([hh] * 8, ignore_index=True) - return hh + hh = pd.read_csv(INITIAL_POPULATION) # , nrows=100) + hh = pd.concat([hh] * 8, ignore_index=True) + return hh def interp(cumprob: np.ndarray[np.float64, np.dtype[np.float64]], x: float) -> int: - lbound = 0 - while lbound < len(cumprob) - 1: - if cumprob[lbound] > x: - break - lbound += 1 - return lbound + lbound = 0 + while lbound < len(cumprob) - 1: + if cumprob[lbound] > x: + break + lbound += 1 + return lbound -def sample(u: float, tc: np.ndarray[np.float64, np.dtype[np.float64]], c: np.ndarray[np.float64, np.dtype[np.float64]]) -> float: - return c[interp(tc, u)] -def transition(c: np.ndarray[np.float64, np.dtype[np.float64]], t: np.ndarray[np.float64, np.dtype[np.float64]], df: pd.DataFrame, colname: str) -> None: - #u = m.mc.ustream(len(df)) - tc = np.cumsum(t, axis=1) +def sample( + u: float, + tc: np.ndarray[np.float64, np.dtype[np.float64]], + c: np.ndarray[np.float64, np.dtype[np.float64]], +) -> float: + return c[interp(tc, u)] - # reverse mapping of category label to index - lookup = { c[i]: i for i in range(len(c)) } - # for i in range(len(df)): - # current = df.loc[i, colname] - # df.loc[i, colname] = sample(u[i], tc[lookup[current]], c) +def transition( + c: np.ndarray[np.float64, np.dtype[np.float64]], + t: np.ndarray[np.float64, np.dtype[np.float64]], + df: pd.DataFrame, + colname: str, +) -> None: + # u = m.mc.ustream(len(df)) + tc = np.cumsum(t, axis=1) + + # reverse mapping of category label to index + lookup = {c[i]: i for i in range(len(c))} + + # for i in range(len(df)): + # current = df.loc[i, colname] + # df.loc[i, colname] = sample(u[i], tc[lookup[current]], c) + + df[colname] = df[colname].apply( + lambda current: sample(m.mc.ustream(1)[0], tc[lookup[current]], c) + ) - df[colname] = df[colname].apply(lambda current: sample(m.mc.ustream(1)[0], tc[lookup[current]], c)) def python_impl(m: no.Model, df: pd.DataFrame) -> tuple[int, float, pd.Series]: + start = time.time() + transition(c, t, df, "LC4408_C_AHTHUK11") + return len(df), time.time() - start, df.LC4408_C_AHTHUK11 - start = time.time() - transition(c, t, df, "LC4408_C_AHTHUK11") - return len(df), time.time() - start, df.LC4408_C_AHTHUK11 def cpp_impl(m: no.Model, df: pd.DataFrame) -> tuple[int, float, pd.Series]: - - start = time.time() - no.df.transition(m, c, t, df, "LC4408_C_AHTHUK11") - return len(df), time.time() - start, df.LC4408_C_AHTHUK11 + start = time.time() + no.df.transition(m, c, t, df, "LC4408_C_AHTHUK11") + return len(df), time.time() - start, df.LC4408_C_AHTHUK11 -#def f(m): +# def f(m): - # n = 1000 +# n = 1000 - # c = [1,2,3] - # df = pd.DataFrame({"n": [1]*n}) +# c = [1,2,3] +# df = pd.DataFrame({"n": [1]*n}) - # # no transitions - # t = np.identity(3) +# # no transitions +# t = np.identity(3) - # no.df.transition(m, c, t, df, "n") - # no.log(df.n.value_counts()[1] == 1000) +# no.df.transition(m, c, t, df, "n") +# no.log(df.n.value_counts()[1] == 1000) - # # all 1 -> 2 - # t[0,0] = 0.0 - # t[1,0] = 1.0 - # no.df.transition(m, c, t, df, "n") - # no.log(df.n.value_counts()[2] == 1000) +# # all 1 -> 2 +# t[0,0] = 0.0 +# t[1,0] = 1.0 +# no.df.transition(m, c, t, df, "n") +# no.log(df.n.value_counts()[2] == 1000) - # # all 2 -> 1 or 3 - # t = np.array([ - # [1.0, 0.5, 0.0], - # [0.0, 0.0, 0.0], - # [0.0, 0.5, 1.0], - # ]) +# # all 2 -> 1 or 3 +# t = np.array([ +# [1.0, 0.5, 0.0], +# [0.0, 0.0, 0.0], +# [0.0, 0.5, 1.0], +# ]) - # no.df.transition(m, c, t, df, "n") - # no.log(2 not in df.n.value_counts())#[2] == 1000) - # no.log(df.n.value_counts()) +# no.df.transition(m, c, t, df, "n") +# no.log(2 not in df.n.value_counts())#[2] == 1000) +# no.log(df.n.value_counts()) - # t = np.ones((3,3)) / 3 - # no.df.transition(m, c, t, df, "n") - # no.log(df.n.value_counts()) - # for i in c: - # no.log(df.n.value_counts()[i] > n/3 - sqrt(n) and df.n.value_counts()[i] < n/3 + sqrt(n)) +# t = np.ones((3,3)) / 3 +# no.df.transition(m, c, t, df, "n") +# no.log(df.n.value_counts()) +# for i in c: +# no.log(df.n.value_counts()[i] > n/3 - sqrt(n) and df.n.value_counts()[i] < n/3 + sqrt(n)) - # t = np.array([ - # [1.0, 1.0, 1.0], - # [0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0], - # ]) - # no.df.transition(m, c, t, df, "n") - # no.log(df.n.value_counts()) +# t = np.array([ +# [1.0, 1.0, 1.0], +# [0.0, 0.0, 0.0], +# [0.0, 0.0, 0.0], +# ]) +# no.df.transition(m, c, t, df, "n") +# no.log(df.n.value_counts()) if __name__ == "__main__": - m = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + m = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) - rows, tc, colcpp = cpp_impl(m, get_data()) - no.log("C++ %d: %f" % (rows, tc)) + rows, tc, colcpp = cpp_impl(m, get_data()) + no.log("C++ %d: %f" % (rows, tc)) - m.mc.reset() - rows, tp, colpy = python_impl(m, get_data()) - no.log("py %d: %f" % (rows, tp)) + m.mc.reset() + rows, tp, colpy = python_impl(m, get_data()) + no.log("py %d: %f" % (rows, tp)) - #no.log(colcpp-colpy) + # no.log(colcpp-colpy) - assert np.array_equal(colcpp, colpy) + assert np.array_equal(colcpp, colpy) - no.log("speedup factor = %f" % (tp / tc)) + no.log("speedup factor = %f" % (tp / tc)) -# f(m) \ No newline at end of file +# f(m) diff --git a/test/conftest.py b/test/conftest.py index 6768a77f..4eeabe02 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,12 +1,13 @@ - import pytest + import neworder as no + @pytest.fixture(scope="function") def base_model() -> no.Model: - return no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + return no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + @pytest.fixture(scope="function") def base_indep_model() -> no.Model: - return no.Model(no.NoTimeline()) - + return no.Model(no.NoTimeline()) diff --git a/test/test_df.py b/test/test_df.py index 3b2b34dc..12c2b78a 100644 --- a/test/test_df.py +++ b/test/test_df.py @@ -1,116 +1,133 @@ +from math import sqrt import numpy as np import pandas as pd -import neworder as no -from math import sqrt import pytest +import neworder as no + def test_errors() -> None: - df = pd.read_csv("./test/df.csv") + df = pd.read_csv("./test/df.csv") - # base model for MC engine - model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + # base model for MC engine + model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) - cats = np.array(range(4)) - # identity matrix means no transitions - trans = np.identity(len(cats)) + cats = np.array(range(4)) + # identity matrix means no transitions + trans = np.identity(len(cats)) - # invalid transition matrices - with pytest.raises(ValueError): - no.df.transition(model, cats, np.ones((1, 2)), df, "DC2101EW_C_ETHPUK11") - with pytest.raises(ValueError): - no.df.transition(model, cats, np.ones((1, 1)), df, "DC2101EW_C_ETHPUK11") - with pytest.raises(ValueError): - no.df.transition(model, cats, trans + 0.1, df, "DC2101EW_C_ETHPUK11") + # invalid transition matrices + with pytest.raises(ValueError): + no.df.transition(model, cats, np.ones((1, 2)), df, "DC2101EW_C_ETHPUK11") + with pytest.raises(ValueError): + no.df.transition(model, cats, np.ones((1, 1)), df, "DC2101EW_C_ETHPUK11") + with pytest.raises(ValueError): + no.df.transition(model, cats, trans + 0.1, df, "DC2101EW_C_ETHPUK11") - # category data MUST be 64bit integer. This will almost certainly be the default on linux/OSX (LP64) but maybe not on windows (LLP64) - df["DC2101EW_C_ETHPUK11"]= df["DC2101EW_C_ETHPUK11"].astype(np.int32) + # category data MUST be 64bit integer. This will almost certainly be the default on linux/OSX (LP64) but maybe not on windows (LLP64) + df["DC2101EW_C_ETHPUK11"] = df["DC2101EW_C_ETHPUK11"].astype(np.int32) - with pytest.raises(TypeError): - no.df.transition(model, cats, trans, df, "DC2101EW_C_ETHPUK11") + with pytest.raises(TypeError): + no.df.transition(model, cats, trans, df, "DC2101EW_C_ETHPUK11") def test_basic() -> None: - - # test unique index generation - idx = no.df.unique_index(100) - assert np.array_equal(idx, np.arange(no.mpi.RANK, 100 * no.mpi.SIZE, step=no.mpi.SIZE)) - - idx = no.df.unique_index(100) - assert np.array_equal(idx, np.arange(100 * no.mpi.SIZE + no.mpi.RANK, 200 * no.mpi.SIZE, step=no.mpi.SIZE)) - - N = 100000 - # base model for MC engine - model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) - - c = [1,2,3] - df = pd.DataFrame({"category": [1] * N}) - - # no transitions, check no changes - t = np.identity(3) - no.df.transition(model, c, t, df, "category") - assert df.category.value_counts()[1] == N - - # all 1 -> 2 - t[0,0] = 0.0 - t[0,1] = 1.0 - no.df.transition(model, c, t, df, "category") - assert 1 not in df.category.value_counts() - assert df.category.value_counts()[2] == N - - # 2 -> 1 or 3 - t = np.array([ - [1.0, 0.0, 0.0], - [0.5, 0.0, 0.5], - [0.0, 0.0, 1.0], - ]) - - no.df.transition(model, c, t, df, "category") - assert 2 not in df.category.value_counts() - for i in [1,3]: - assert df.category.value_counts()[i] > N/2 - sqrt(N) and df.category.value_counts()[i] < N/2 + sqrt(N) - - # spread evenly - t = np.ones((3,3)) / 3 - no.df.transition(model, c, t, df, "category") - for i in c: - assert df.category.value_counts()[i] > N/3 - sqrt(N) and df.category.value_counts()[i] < N/3 + sqrt(N) - - # all -> 1 - t = np.array([ - [1.0, 0.0, 0.0], - [1.0, 0.0, 0.0], - [1.0, 0.0, 0.0], - ]) - no.df.transition(model, c, t, df, "category") - assert df.category.value_counts()[1] == N + # test unique index generation + idx = no.df.unique_index(100) + assert np.array_equal( + idx, np.arange(no.mpi.RANK, 100 * no.mpi.SIZE, step=no.mpi.SIZE) + ) + + idx = no.df.unique_index(100) + assert np.array_equal( + idx, + np.arange(100 * no.mpi.SIZE + no.mpi.RANK, 200 * no.mpi.SIZE, step=no.mpi.SIZE), + ) + + N = 100000 + # base model for MC engine + model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + + c = [1, 2, 3] + df = pd.DataFrame({"category": [1] * N}) + + # no transitions, check no changes + t = np.identity(3) + no.df.transition(model, c, t, df, "category") + assert df.category.value_counts()[1] == N + + # all 1 -> 2 + t[0, 0] = 0.0 + t[0, 1] = 1.0 + no.df.transition(model, c, t, df, "category") + assert 1 not in df.category.value_counts() + assert df.category.value_counts()[2] == N + + # 2 -> 1 or 3 + t = np.array( + [ + [1.0, 0.0, 0.0], + [0.5, 0.0, 0.5], + [0.0, 0.0, 1.0], + ] + ) + + no.df.transition(model, c, t, df, "category") + assert 2 not in df.category.value_counts() + for i in [1, 3]: + assert df.category.value_counts()[i] > N / 2 - sqrt( + N + ) and df.category.value_counts()[i] < N / 2 + sqrt(N) + + # spread evenly + t = np.ones((3, 3)) / 3 + no.df.transition(model, c, t, df, "category") + for i in c: + assert df.category.value_counts()[i] > N / 3 - sqrt( + N + ) and df.category.value_counts()[i] < N / 3 + sqrt(N) + + # all -> 1 + t = np.array( + [ + [1.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + ] + ) + no.df.transition(model, c, t, df, "category") + assert df.category.value_counts()[1] == N def test(base_model: no.Model) -> None: - df = pd.read_csv("./test/df.csv") - - cats = np.array(range(4)) - # identity matrix means no transitions - trans = np.identity(len(cats)) - - no.df.transition(base_model, cats, trans, df, "DC2101EW_C_ETHPUK11") - - assert len(df["DC2101EW_C_ETHPUK11"].unique()) == 1 and df["DC2101EW_C_ETHPUK11"].unique()[0] == 2 - - # NOTE transition matrix interpreted as being COLUMN MAJOR due to pandas DataFrame storing data in column-major order - - # force 2->3 - trans[2, 2] = 0.0 - trans[2, 3] = 1.0 - no.df.transition(base_model, cats, trans, df, "DC2101EW_C_ETHPUK11") - no.log(df["DC2101EW_C_ETHPUK11"].unique()) - assert len(df["DC2101EW_C_ETHPUK11"].unique()) == 1 and df["DC2101EW_C_ETHPUK11"].unique()[0] == 3 - - - # ~half of 3->0 - trans[3, 0] = 0.5 - trans[3, 3] = 0.5 - no.df.transition(base_model, cats, trans, df, "DC2101EW_C_ETHPUK11") - assert np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()), np.array([0, 3])) - + df = pd.read_csv("./test/df.csv") + + cats = np.array(range(4)) + # identity matrix means no transitions + trans = np.identity(len(cats)) + + no.df.transition(base_model, cats, trans, df, "DC2101EW_C_ETHPUK11") + + assert ( + len(df["DC2101EW_C_ETHPUK11"].unique()) == 1 + and df["DC2101EW_C_ETHPUK11"].unique()[0] == 2 + ) + + # NOTE transition matrix interpreted as being COLUMN MAJOR due to pandas DataFrame storing data in column-major order + + # force 2->3 + trans[2, 2] = 0.0 + trans[2, 3] = 1.0 + no.df.transition(base_model, cats, trans, df, "DC2101EW_C_ETHPUK11") + no.log(df["DC2101EW_C_ETHPUK11"].unique()) + assert ( + len(df["DC2101EW_C_ETHPUK11"].unique()) == 1 + and df["DC2101EW_C_ETHPUK11"].unique()[0] == 3 + ) + + # ~half of 3->0 + trans[3, 0] = 0.5 + trans[3, 3] = 0.5 + no.df.transition(base_model, cats, trans, df, "DC2101EW_C_ETHPUK11") + assert np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()), np.array([0, 3])) diff --git a/test/test_domain.py b/test/test_domain.py index 72bd3a80..102bdb9c 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -1,135 +1,141 @@ - import numpy as np -import neworder as no import pandas as pd # type: ignore import pytest +import neworder as no + def test_invalid() -> None: - with pytest.raises(AssertionError): - no.Space(np.array([]), np.array([])) - with pytest.raises(AssertionError): - no.Space(np.array([0.0]), np.array([0.0])) - with pytest.raises(AssertionError): - no.Space(np.array([0.0, 1.0]), np.array([1.0, -1.0])) + with pytest.raises(AssertionError): + no.Space(np.array([]), np.array([])) + with pytest.raises(AssertionError): + no.Space(np.array([0.0]), np.array([0.0])) + with pytest.raises(AssertionError): + no.Space(np.array([0.0, 1.0]), np.array([1.0, -1.0])) def test_space2d() -> None: + # constrained edges + space2dc = no.Space(np.array([-1.0, -3.0]), np.array([2.0, 5.0]), no.Edge.CONSTRAIN) - # constrained edges - space2dc = no.Space(np.array([-1.0, -3.0]), np.array([2.0, 5.0]), no.Edge.CONSTRAIN) - - point = np.zeros(2) - delta = np.array([0.6, 0.7]) + point = np.zeros(2) + delta = np.array([0.6, 0.7]) - # move point until stuck in corner - for _ in range(100): - point, delta = space2dc.move(point, delta, 1.0) + # move point until stuck in corner + for _ in range(100): + point, delta = space2dc.move(point, delta, 1.0) - # check its in corner and not moving - assert point[0] == 2.0 - assert point[1] == 5.0 - assert delta[0] == 0.0 - assert delta[1] == 0.0 + # check its in corner and not moving + assert point[0] == 2.0 + assert point[1] == 5.0 + assert delta[0] == 0.0 + assert delta[1] == 0.0 - # wrapped edges - space2dw = no.Space(np.array([-1.0, -3.0]), np.array([2.0, 5.0]), no.Edge.WRAP) + # wrapped edges + space2dw = no.Space(np.array([-1.0, -3.0]), np.array([2.0, 5.0]), no.Edge.WRAP) - assert space2dw.dim == 2 + assert space2dw.dim == 2 - points = np.array([[0.,0.],[1.,0.],[0.,1.]]) - delta = np.array([0.6, 0.7]) + points = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]]) + delta = np.array([0.6, 0.7]) - # move point - for _ in range(100): - points, delta = space2dw.move(points, delta, 1.0) - # check distances dont change - d2, _ = space2dw.dists2(points) - assert np.all(d2.diagonal() == 0.0) - assert np.allclose(d2[0], np.array([0., 1., 1.])) - assert np.allclose(d2[1], np.array([1., 0., 2.])) + # move point + for _ in range(100): + points, delta = space2dw.move(points, delta, 1.0) + # check distances dont change + d2, _ = space2dw.dists2(points) + assert np.all(d2.diagonal() == 0.0) + assert np.allclose(d2[0], np.array([0.0, 1.0, 1.0])) + assert np.allclose(d2[1], np.array([1.0, 0.0, 2.0])) - # check its still in domain and speed unchanged - assert np.all(points[:,0] >= -1.0) and np.all(points[:, 0] < 2.0) - assert np.all(points[:,1] >= -3.0) and np.all(points[:, 1] < 5.0) - assert delta[0] == 0.6 - assert delta[1] == 0.7 + # check its still in domain and speed unchanged + assert np.all(points[:, 0] >= -1.0) and np.all(points[:, 0] < 2.0) + assert np.all(points[:, 1] >= -3.0) and np.all(points[:, 1] < 5.0) + assert delta[0] == 0.6 + assert delta[1] == 0.7 - # bounce edges - space2db = no.Space(np.array([-1.0, -3.0]), np.array([2.0, 5.0]), no.Edge.BOUNCE) + # bounce edges + space2db = no.Space(np.array([-1.0, -3.0]), np.array([2.0, 5.0]), no.Edge.BOUNCE) - assert space2db.dim == 2 + assert space2db.dim == 2 - points = np.array([[0.,0.],[1.,0.],[0.,1.]]) - deltas = np.array([[0.6, 0.7],[0.6, 0.7],[0.6, 0.7]]) + points = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]]) + deltas = np.array([[0.6, 0.7], [0.6, 0.7], [0.6, 0.7]]) - # move points - for _ in range(100): - points, deltas = space2dw.move(points, deltas, 1.0) + # move points + for _ in range(100): + points, deltas = space2dw.move(points, deltas, 1.0) - # check points still in domain and absolute speed unchanged - assert np.all(points[:,0] >= -1.0) and np.all(points[:, 0] < 2.0) - assert np.all(points[:,1] >= -3.0) and np.all(points[:, 1] < 5.0) - assert np.all(np.abs(deltas[:,0]) == 0.6) - assert np.all(np.abs(deltas[:,1]) == 0.7) + # check points still in domain and absolute speed unchanged + assert np.all(points[:, 0] >= -1.0) and np.all(points[:, 0] < 2.0) + assert np.all(points[:, 1] >= -3.0) and np.all(points[:, 1] < 5.0) + assert np.all(np.abs(deltas[:, 0]) == 0.6) + assert np.all(np.abs(deltas[:, 1]) == 0.7) def test_space3d() -> None: - rng = np.random.default_rng(19937) + rng = np.random.default_rng(19937) - N = 5 - bodies = pd.DataFrame(index=no.df.unique_index(N), data={ - "x": rng.random(N) - 0.5, - "y": rng.random(N) - 0.5, - "z": rng.random(N) - 0.5, - "vx": 0.01, - "vy": 0.01, - "vz": 0.01 - }) + N = 5 + bodies = pd.DataFrame( + index=no.df.unique_index(N), + data={ + "x": rng.random(N) - 0.5, + "y": rng.random(N) - 0.5, + "z": rng.random(N) - 0.5, + "vx": 0.01, + "vy": 0.01, + "vz": 0.01, + }, + ) - space = no.Space.unbounded(3) + space = no.Space.unbounded(3) - s = np.column_stack((bodies.x, bodies.y, bodies.z)) - assert np.all(space.dists(s).diagonal() == 0.0) + s = np.column_stack((bodies.x, bodies.y, bodies.z)) + assert np.all(space.dists(s).diagonal() == 0.0) - assert space.dim == 3 + assert space.dim == 3 - dt = 1.0 - (bodies.x, bodies.y, bodies.z), (bodies.vx, bodies.vy, bodies.vz) = space.move((bodies.x, bodies.y, bodies.z), (bodies.vx, bodies.vy, bodies.vz), dt, ungroup=True) + dt = 1.0 + (bodies.x, bodies.y, bodies.z), (bodies.vx, bodies.vy, bodies.vz) = space.move( + (bodies.x, bodies.y, bodies.z), + (bodies.vx, bodies.vy, bodies.vz), + dt, + ungroup=True, + ) def test_grid() -> None: - - with pytest.raises(ValueError): - no.StateGrid(np.empty(shape=(3,3)), no.Edge.UNBOUNDED) - with pytest.raises(ValueError): - no.StateGrid(np.empty(shape=())) - with pytest.raises(ValueError): - no.StateGrid(np.empty(shape=(2, 0))) - - state = np.zeros((5,5)) - state[0, 0] = 1 - state[1, 1] = 2 - state[1, -1] = 3 - - # total neighbours should be 3 in corner, 5 on edge, 8 in middle - g = no.StateGrid(state, no.Edge.CONSTRAIN) - assert np.sum(g.count_neighbours()) == 3 - assert np.sum(g.count_neighbours(lambda x: x == 2)) == 8 - assert np.sum(g.count_neighbours(lambda x: x == 3)) == 5 - assert np.sum(g.count_neighbours(lambda x: x != 0)) == 16 - assert g.shift((0, 0), (-1, -1)) == (0, 0) - - state = np.zeros((4,4,4)) - state[0,0,0] = 1 - state[-1,1,-1] = -1 - - # total neighbours should be 26 - g = no.StateGrid(state, no.Edge.WRAP) - assert np.sum(g.count_neighbours()) == 26 - assert np.sum(g.count_neighbours(lambda x: x == -1)) == 26 - assert np.sum(g.count_neighbours(lambda x: x != 0)) == 52 - assert g.shift((0, 0, 0), (-1, -1, -1)) == (3, 3, 3) - - g = no.StateGrid(state, no.Edge.BOUNCE) - assert g.shift((0, 0, 0), (-1, -1, -1)) == (1, 1, 1) + with pytest.raises(ValueError): + no.StateGrid(np.empty(shape=(3, 3)), no.Edge.UNBOUNDED) + with pytest.raises(ValueError): + no.StateGrid(np.empty(shape=())) + with pytest.raises(ValueError): + no.StateGrid(np.empty(shape=(2, 0))) + + state = np.zeros((5, 5)) + state[0, 0] = 1 + state[1, 1] = 2 + state[1, -1] = 3 + + # total neighbours should be 3 in corner, 5 on edge, 8 in middle + g = no.StateGrid(state, no.Edge.CONSTRAIN) + assert np.sum(g.count_neighbours()) == 3 + assert np.sum(g.count_neighbours(lambda x: x == 2)) == 8 + assert np.sum(g.count_neighbours(lambda x: x == 3)) == 5 + assert np.sum(g.count_neighbours(lambda x: x != 0)) == 16 + assert g.shift((0, 0), (-1, -1)) == (0, 0) + + state = np.zeros((4, 4, 4)) + state[0, 0, 0] = 1 + state[-1, 1, -1] = -1 + + # total neighbours should be 26 + g = no.StateGrid(state, no.Edge.WRAP) + assert np.sum(g.count_neighbours()) == 26 + assert np.sum(g.count_neighbours(lambda x: x == -1)) == 26 + assert np.sum(g.count_neighbours(lambda x: x != 0)) == 52 + assert g.shift((0, 0, 0), (-1, -1, -1)) == (3, 3, 3) + + g = no.StateGrid(state, no.Edge.BOUNCE) + assert g.shift((0, 0, 0), (-1, -1, -1)) == (1, 1, 1) diff --git a/test/test_geospatial.py b/test/test_geospatial.py index 7e8a7785..0a1e5f78 100644 --- a/test/test_geospatial.py +++ b/test/test_geospatial.py @@ -1,11 +1,12 @@ import pytest -import matplotlib.pyplot as plt -def test_geospatial() -> None: +def test_geospatial() -> None: geospatial = pytest.importorskip("neworder.geospatial") # TODO... - domain = geospatial.GeospatialGraph.from_point((54.3748, -2.9988), dist=2000, network_type="drive", crs='epsg:27700') + domain = geospatial.GeospatialGraph.from_point( + (54.3748, -2.9988), dist=2000, network_type="drive", crs="epsg:27700" + ) - assert domain.crs == 'epsg:27700' + assert domain.crs == "epsg:27700" assert len(domain.graph) == len(domain.all_nodes) diff --git a/test/test_mc.py b/test/test_mc.py index 7d48ade3..457f5d1d 100644 --- a/test/test_mc.py +++ b/test/test_mc.py @@ -1,333 +1,367 @@ +from math import factorial + import numpy as np -import numpy.typing as npt -import neworder as no import pytest +import neworder as no + def test_mc_property(base_model: no.Model) -> None: - base_model.mc.ustream(1) - base_model.mc.reset() + base_model.mc.ustream(1) + base_model.mc.reset() def test_mc(base_model: no.Model) -> None: - x = base_model.mc.ustream(1) - base_model.mc.reset() - assert x == base_model.mc.ustream(1) + x = base_model.mc.ustream(1) + base_model.mc.reset() + assert x == base_model.mc.ustream(1) def test_seeders() -> None: - # serial tests - # determinisitc seeders always return the same value - assert no.MonteCarlo.deterministic_identical_stream() == no.MonteCarlo.deterministic_identical_stream() - assert no.MonteCarlo.deterministic_independent_stream() == no.MonteCarlo.deterministic_independent_stream() - # nondeterministic seeders don't - assert no.MonteCarlo.nondeterministic_stream() != no.MonteCarlo.nondeterministic_stream() - - if not no.mpi.COMM: - return - - # parallel tests - # all seeds equal - seeds = no.mpi.COMM.gather(no.MonteCarlo.deterministic_identical_stream(), 0) - if no.mpi.RANK == 0: - assert seeds - assert len(seeds) == no.mpi.SIZE - assert len(set(seeds)) == 1 - - # all seeds different but reproducible - seeds = no.mpi.COMM.gather(no.MonteCarlo.deterministic_independent_stream(), 0) - if no.mpi.RANK == 0: - assert seeds - assert len(seeds) == no.mpi.SIZE - assert len(set(seeds)) == len(seeds) - seeds2 = no.mpi.COMM.gather(no.MonteCarlo.deterministic_independent_stream(), 0) - if no.mpi.RANK == 0: - assert seeds == seeds2 - - # all seeds different and not reproducible - seeds = no.mpi.COMM.gather(no.MonteCarlo.nondeterministic_stream(), 0) - if no.mpi.RANK == 0: - assert seeds - assert len(seeds) == no.mpi.SIZE - assert len(set(seeds)) == len(seeds) - # TODO need higher time resolution on seeder - seeds2 = no.mpi.COMM.gather(no.MonteCarlo.nondeterministic_stream(), 0) - if no.mpi.RANK == 0: - assert seeds != seeds2 - - # test custom seeder - seeder = lambda: no.mpi.RANK + 1 - m = no.Model(no.NoTimeline(), seeder) - assert m.mc.seed() == no.mpi.RANK + 1 + # serial tests + # determinisitc seeders always return the same value + assert ( + no.MonteCarlo.deterministic_identical_stream() + == no.MonteCarlo.deterministic_identical_stream() + ) + assert ( + no.MonteCarlo.deterministic_independent_stream() + == no.MonteCarlo.deterministic_independent_stream() + ) + # nondeterministic seeders don't + assert ( + no.MonteCarlo.nondeterministic_stream() + != no.MonteCarlo.nondeterministic_stream() + ) + + if not no.mpi.COMM: + return + + # parallel tests + # all seeds equal + seeds = no.mpi.COMM.gather(no.MonteCarlo.deterministic_identical_stream(), 0) + if no.mpi.RANK == 0: + assert seeds + assert len(seeds) == no.mpi.SIZE + assert len(set(seeds)) == 1 + + # all seeds different but reproducible + seeds = no.mpi.COMM.gather(no.MonteCarlo.deterministic_independent_stream(), 0) + if no.mpi.RANK == 0: + assert seeds + assert len(seeds) == no.mpi.SIZE + assert len(set(seeds)) == len(seeds) + seeds2 = no.mpi.COMM.gather(no.MonteCarlo.deterministic_independent_stream(), 0) + if no.mpi.RANK == 0: + assert seeds == seeds2 + + # all seeds different and not reproducible + seeds = no.mpi.COMM.gather(no.MonteCarlo.nondeterministic_stream(), 0) + if no.mpi.RANK == 0: + assert seeds + assert len(seeds) == no.mpi.SIZE + assert len(set(seeds)) == len(seeds) + # TODO need higher time resolution on seeder + seeds2 = no.mpi.COMM.gather(no.MonteCarlo.nondeterministic_stream(), 0) + if no.mpi.RANK == 0: + assert seeds != seeds2 + + # test custom seeder + def seeder() -> int: + return no.mpi.RANK + 1 + + m = no.Model(no.NoTimeline(), seeder) + assert m.mc.seed() == no.mpi.RANK + 1 def test_sample(base_model: no.Model) -> None: - with pytest.raises(ValueError): - base_model.mc.sample(100, np.array([0.9])) - with pytest.raises(ValueError): - base_model.mc.sample(100, np.array([-0.1, 1.1])) - assert np.all(base_model.mc.sample(100, np.array([1.0, 0.0, 0.0, 0.0])) == 0) - assert np.all(base_model.mc.sample(100, np.array([0.0, 1.0, 0.0, 0.0])) == 1) - assert np.all(base_model.mc.sample(100, np.array([0.0, 0.0, 0.0, 1.0])) == 3) + with pytest.raises(ValueError): + base_model.mc.sample(100, np.array([0.9])) + with pytest.raises(ValueError): + base_model.mc.sample(100, np.array([-0.1, 1.1])) + assert np.all(base_model.mc.sample(100, np.array([1.0, 0.0, 0.0, 0.0])) == 0) + assert np.all(base_model.mc.sample(100, np.array([0.0, 1.0, 0.0, 0.0])) == 1) + assert np.all(base_model.mc.sample(100, np.array([0.0, 0.0, 0.0, 1.0])) == 3) def test_hazard(base_model: no.Model) -> None: - assert np.all(base_model.mc.hazard(0.0,10) == 0.0) - assert np.all(base_model.mc.hazard(1.0,10) == 1.0) - - with pytest.raises(ValueError): - base_model.mc.hazard(-0.1, 10) - with pytest.raises(ValueError): - base_model.mc.hazard(1.1, 10) - with pytest.raises(ValueError): - base_model.mc.hazard(np.array([-0.1, 0.5])) - with pytest.raises(ValueError): - base_model.mc.hazard(np.array([0.1, 1.2])) - with pytest.raises(ValueError): - base_model.mc.hazard(np.nan, 1) - with pytest.raises(ValueError): - base_model.mc.hazard(np.array([0.1, np.nan])) + assert np.all(base_model.mc.hazard(0.0, 10) == 0.0) + assert np.all(base_model.mc.hazard(1.0, 10) == 1.0) + + with pytest.raises(ValueError): + base_model.mc.hazard(-0.1, 10) + with pytest.raises(ValueError): + base_model.mc.hazard(1.1, 10) + with pytest.raises(ValueError): + base_model.mc.hazard(np.array([-0.1, 0.5])) + with pytest.raises(ValueError): + base_model.mc.hazard(np.array([0.1, 1.2])) + with pytest.raises(ValueError): + base_model.mc.hazard(np.nan, 1) + with pytest.raises(ValueError): + base_model.mc.hazard(np.array([0.1, np.nan])) def test_stopping(base_model: no.Model) -> None: - assert np.all(base_model.mc.stopping(0.0, 10) == no.time.FAR_FUTURE) - - with pytest.raises(ValueError): - base_model.mc.stopping(-0.1, 10) - with pytest.raises(ValueError): - base_model.mc.stopping(1.1, 10) - with pytest.raises(ValueError): - base_model.mc.stopping(np.array([-0.1, 0.5])) - with pytest.raises(ValueError): - base_model.mc.stopping(np.array([0.1, 1.2])) - with pytest.raises(ValueError): - base_model.mc.stopping(np.nan, 1) - with pytest.raises(ValueError): - base_model.mc.stopping(np.array([0.1, np.nan])) + assert np.all(base_model.mc.stopping(0.0, 10) == no.time.FAR_FUTURE) + + with pytest.raises(ValueError): + base_model.mc.stopping(-0.1, 10) + with pytest.raises(ValueError): + base_model.mc.stopping(1.1, 10) + with pytest.raises(ValueError): + base_model.mc.stopping(np.array([-0.1, 0.5])) + with pytest.raises(ValueError): + base_model.mc.stopping(np.array([0.1, 1.2])) + with pytest.raises(ValueError): + base_model.mc.stopping(np.nan, 1) + with pytest.raises(ValueError): + base_model.mc.stopping(np.array([0.1, np.nan])) + def test_arrivals_validation(base_model: no.Model) -> None: - assert np.all(no.time.isnever(base_model.mc.first_arrival([0.0,0.0], 1.0, 10))) - with pytest.raises(ValueError): - base_model.mc.first_arrival(np.array([-1.0, 0.0]), 1.0, 10) - with pytest.raises(ValueError): - base_model.mc.first_arrival([1.0, np.nan], 1.0, 10) - - assert np.all(no.time.isnever(base_model.mc.next_arrival(np.zeros(10), [0.0, 0.0], 1.0))) - with pytest.raises(ValueError): - base_model.mc.next_arrival(np.zeros(10), [-1.0, 0.0], 1.0) - with pytest.raises(ValueError): - base_model.mc.next_arrival(np.zeros(10), [np.nan, np.nan], 1.0) - - with pytest.raises(ValueError): - base_model.mc.arrivals([-1.0, 0.0], 1.0, 10, 0.0) - with pytest.raises(ValueError): - base_model.mc.arrivals([1.0, 1.0], 1.0, 10, 0.0) - with pytest.raises(ValueError): - base_model.mc.arrivals([np.nan, np.nan], 1.0, 10, 0.0) + assert np.all(no.time.isnever(base_model.mc.first_arrival([0.0, 0.0], 1.0, 10))) + with pytest.raises(ValueError): + base_model.mc.first_arrival(np.array([-1.0, 0.0]), 1.0, 10) + with pytest.raises(ValueError): + base_model.mc.first_arrival([1.0, np.nan], 1.0, 10) + + assert np.all( + no.time.isnever(base_model.mc.next_arrival(np.zeros(10), [0.0, 0.0], 1.0)) + ) + with pytest.raises(ValueError): + base_model.mc.next_arrival(np.zeros(10), [-1.0, 0.0], 1.0) + with pytest.raises(ValueError): + base_model.mc.next_arrival(np.zeros(10), [np.nan, np.nan], 1.0) + + with pytest.raises(ValueError): + base_model.mc.arrivals([-1.0, 0.0], 1.0, 10, 0.0) + with pytest.raises(ValueError): + base_model.mc.arrivals([1.0, 1.0], 1.0, 10, 0.0) + with pytest.raises(ValueError): + base_model.mc.arrivals([np.nan, np.nan], 1.0, 10, 0.0) def test_mc_counts(base_model: no.Model) -> None: - mc = base_model.mc - assert mc.seed() == 19937 - - def poisson_pdf(x: range, l: float) -> np.ndarray: - y = np.exp(-l) - return np.array([l**k * y / np.math.factorial(k) for k in x]) # type: ignore # Module has no attribute "math"; maybe "emath" or "mat"? + mc = base_model.mc + assert mc.seed() == 19937 - tests = [(1.0, 1.0, 10000), (3.0, 0.5, 10000), (0.2, 2.0, 10000), (10.0, 1.0, 1000), (3.0, 1.0, 100000)] + def poisson_pdf(x: range, lambda_: float) -> np.ndarray: + y = np.exp(-lambda_) + return np.array([lambda_**k * y / factorial(k) for k in x]) # type: ignore # Module has no attribute "math"; maybe "emath" or "mat"? - for lam, dt, n in tests: + tests = [ + (1.0, 1.0, 10000), + (3.0, 0.5, 10000), + (0.2, 2.0, 10000), + (10.0, 1.0, 1000), + (3.0, 1.0, 100000), + ] - c = mc.counts([lam] * n, dt) - x = range(0, max(c)) - # convert to counts - c1 = [(c == k).sum() / n for k in x] - p = poisson_pdf(x, lam * dt) + for lam, dt, n in tests: + c = mc.counts([lam] * n, dt) + x = range(0, max(c)) + # convert to counts + c1 = [(c == k).sum() / n for k in x] + p = poisson_pdf(x, lam * dt) - for i in x: - assert np.fabs(c1[i] - p[i]) < 1.0 / np.sqrt(n) + for i in x: + assert np.fabs(c1[i] - p[i]) < 1.0 / np.sqrt(n) def test_mc_serial(base_model: no.Model) -> None: - - if no.mpi.SIZE != 1: - return - - mc = base_model.mc - assert mc.seed() == 19937 - - mc.reset() - assert mc.raw() == 6231104047474287856 - assert mc.raw() == 14999272868227999252 - mc.reset() - assert mc.raw() == 6231104047474287856 - assert mc.raw() == 14999272868227999252 - - mc.reset() - s = mc.state() - a = mc.ustream(5) - assert s != mc.state() - assert abs(a[0] - 0.33778882725164294) < 1e-8 - assert abs(a[1] - 0.04767065867781639) < 1e-8 - assert abs(a[2] - 0.8131122114136815) < 1e-8 - assert abs(a[3] - 0.24954832065850496) < 1e-8 - assert abs(a[4] - 0.3385562978219241) < 1e-8 - - mc.reset() - assert s == mc.state() - h = mc.hazard(0.5, 1000000) - assert np.sum(h) == 500151 - - n = 10000 - # 10% constant hazard for 10 time units, followed by zero - dt = 1.0 - p = np.full(11, 0.1) - p[-1] = 0 - a = mc.first_arrival(p, dt, n) # type: ignore[assignment] - assert np.nanmin(a) > 0.0 - assert np.nanmax(a) < 10.0 - no.log("%f - %f" % (np.nanmin(a), np.nanmax(a))) - - # now set a to all 8.0 - a = np.full(n, 8.0) - # next arrivals (absolute) only in range 8-10, if they happen - b = mc.next_arrival(a, p, dt) - assert np.nanmin(b) > 8.0 - assert np.nanmax(b) < 10.0 - - # next arrivals with gap dt (absolute) only in range 9-10, if they happen - b = mc.next_arrival(a, p, dt, False, dt) - assert np.nanmin(b) > 9.0 - assert np.nanmax(b) < 10.0 - - # next arrivals (relative) only in range 8-18, if they happen - b = mc.next_arrival(a, p, dt, True) - assert np.nanmin(b) > 8.0 - assert np.nanmax(b) < 18.0 - - # next arrivals with gap dt (relative) only in range 9-19, if they happen - b = mc.next_arrival(a, p, dt, True, dt) - assert np.nanmin(b) > 9.0 - assert np.nanmax(b) < 19.0 - - # now set a back to random arrivals - a = mc.first_arrival(p, dt, n) # type: ignore[assignment] - # next arrivals (absolute) only in range (min(a), 10), if they happen - b = mc.next_arrival(a, p, dt) - assert np.nanmin(b) > np.nanmin(a) - assert np.nanmax(b) < 10.0 - - # next arrivals with gap dt (absolute) only in range (min(a)+dt, 10), if they happen - b = mc.next_arrival(a, p, dt, False, dt) - assert np.nanmin(b) > np.nanmin(a) + dt - assert np.nanmax(b) < 10.0 - - # next arrivals (relative) only in range (min(a), max(a)+10), if they happen - b = mc.next_arrival(a, p, dt, True) - assert np.nanmin(b) > np.nanmin(a) - assert np.nanmax(b) < np.nanmax(a) + 10.0 - - # next arrivals with gap dt (relative) only in range (min(a)+dt, max(a)+dt+10), if they happen - b = mc.next_arrival(a, p, dt, True, dt) - assert np.nanmin(b) > np.nanmin(a) + dt - assert np.nanmax(b) < np.nanmax(a) + dt + 10.0 - - mc.reset() - a = mc.first_arrival(np.array([0.1, 0.2, 0.3]), 1.0, 6, 0.0) # type: ignore[assignment] - assert len(a) == 6 - # only works for single-process - assert a[0] == 3.6177811673165667 - assert a[1] == 0.6896205251312125 - assert a[2] == 3.610216282947799 - assert a[3] == 7.883336832344425 - assert a[4] == 6.461894711350323 - assert a[5] == 2.8566436418145944 - - mc.reset() - a = mc.arrivals([1.0, 2.0, 3.0, 0.0], 1.0, 1, 0.0) - assert np.allclose(a[0], [0.361778116731657, 0.430740169244778, 1.580095480774, 2.226284951909032, 2.511949316090492, 2.809348320658414, 2.929632529913839]) - mc.reset() - # now with a mim separation of 1.0 - a = mc.arrivals([1.0, 2.0, 3.0, 0.0], 1.0, 1, 1.0) - assert np.allclose(a[0], [0.361778116731657, 1.430740169244778]) - mc.reset() - - # Exp.value = p +/- 1/sqrt(N) - h = base_model.mc.hazard(0.2, 10000) - assert isinstance(h, np.ndarray) - assert len(h) == 10000 - assert abs(np.mean(h) - 0.2) < 0.01 - - hv = base_model.mc.hazard(np.array([0.1, 0.2, 0.3, 0.4, 0.5])) - assert isinstance(hv, np.ndarray) - assert len(hv) == 5 - - # Exp.value = 1/p +/- 1/sqrt(N) - st = base_model.mc.stopping(0.1, 10000) - assert isinstance(st, np.ndarray) - assert len(st) == 10000 - assert abs(np.mean(st)/10 - 1.0) < 0.03 - - sv = base_model.mc.stopping(np.array([0.1, 0.2, 0.3, 0.4, 0.5])) - assert isinstance(sv, np.ndarray) - assert len(sv) == 5 - - # Non-homogeneous Poisson process (time-dependent hazard) - nhpp = base_model.mc.first_arrival(np.array([0.1, 0.2, 0.3, 0.4, 0.5]), 1.0, 10, 0.0) - assert isinstance(nhpp, np.ndarray) - assert len(nhpp) == 10 + if no.mpi.SIZE != 1: + return + + mc = base_model.mc + assert mc.seed() == 19937 + + mc.reset() + assert mc.raw() == 6231104047474287856 + assert mc.raw() == 14999272868227999252 + mc.reset() + assert mc.raw() == 6231104047474287856 + assert mc.raw() == 14999272868227999252 + + mc.reset() + s = mc.state() + a = mc.ustream(5) + assert s != mc.state() + assert abs(a[0] - 0.33778882725164294) < 1e-8 + assert abs(a[1] - 0.04767065867781639) < 1e-8 + assert abs(a[2] - 0.8131122114136815) < 1e-8 + assert abs(a[3] - 0.24954832065850496) < 1e-8 + assert abs(a[4] - 0.3385562978219241) < 1e-8 + + mc.reset() + assert s == mc.state() + h = mc.hazard(0.5, 1000000) + assert np.sum(h) == 500151 + + n = 10000 + # 10% constant hazard for 10 time units, followed by zero + dt = 1.0 + p = np.full(11, 0.1) + p[-1] = 0 + a = mc.first_arrival(p, dt, n) # type: ignore[assignment] + assert np.nanmin(a) > 0.0 + assert np.nanmax(a) < 10.0 + no.log("%f - %f" % (np.nanmin(a), np.nanmax(a))) + + # now set a to all 8.0 + a = np.full(n, 8.0) + # next arrivals (absolute) only in range 8-10, if they happen + b = mc.next_arrival(a, p, dt) + assert np.nanmin(b) > 8.0 + assert np.nanmax(b) < 10.0 + + # next arrivals with gap dt (absolute) only in range 9-10, if they happen + b = mc.next_arrival(a, p, dt, False, dt) + assert np.nanmin(b) > 9.0 + assert np.nanmax(b) < 10.0 + + # next arrivals (relative) only in range 8-18, if they happen + b = mc.next_arrival(a, p, dt, True) + assert np.nanmin(b) > 8.0 + assert np.nanmax(b) < 18.0 + + # next arrivals with gap dt (relative) only in range 9-19, if they happen + b = mc.next_arrival(a, p, dt, True, dt) + assert np.nanmin(b) > 9.0 + assert np.nanmax(b) < 19.0 + + # now set a back to random arrivals + a = mc.first_arrival(p, dt, n) # type: ignore[assignment] + # next arrivals (absolute) only in range (min(a), 10), if they happen + b = mc.next_arrival(a, p, dt) + assert np.nanmin(b) > np.nanmin(a) + assert np.nanmax(b) < 10.0 + + # next arrivals with gap dt (absolute) only in range (min(a)+dt, 10), if they happen + b = mc.next_arrival(a, p, dt, False, dt) + assert np.nanmin(b) > np.nanmin(a) + dt + assert np.nanmax(b) < 10.0 + + # next arrivals (relative) only in range (min(a), max(a)+10), if they happen + b = mc.next_arrival(a, p, dt, True) + assert np.nanmin(b) > np.nanmin(a) + assert np.nanmax(b) < np.nanmax(a) + 10.0 + + # next arrivals with gap dt (relative) only in range (min(a)+dt, max(a)+dt+10), if they happen + b = mc.next_arrival(a, p, dt, True, dt) + assert np.nanmin(b) > np.nanmin(a) + dt + assert np.nanmax(b) < np.nanmax(a) + dt + 10.0 + + mc.reset() + a = mc.first_arrival(np.array([0.1, 0.2, 0.3]), 1.0, 6, 0.0) # type: ignore[assignment] + assert len(a) == 6 + # only works for single-process + assert a[0] == 3.6177811673165667 + assert a[1] == 0.6896205251312125 + assert a[2] == 3.610216282947799 + assert a[3] == 7.883336832344425 + assert a[4] == 6.461894711350323 + assert a[5] == 2.8566436418145944 + + mc.reset() + a = mc.arrivals([1.0, 2.0, 3.0, 0.0], 1.0, 1, 0.0) + assert np.allclose( + a[0], + [ + 0.361778116731657, + 0.430740169244778, + 1.580095480774, + 2.226284951909032, + 2.511949316090492, + 2.809348320658414, + 2.929632529913839, + ], + ) + mc.reset() + # now with a mim separation of 1.0 + a = mc.arrivals([1.0, 2.0, 3.0, 0.0], 1.0, 1, 1.0) + assert np.allclose(a[0], [0.361778116731657, 1.430740169244778]) + mc.reset() + + # Exp.value = p +/- 1/sqrt(N) + h = base_model.mc.hazard(0.2, 10000) + assert isinstance(h, np.ndarray) + assert len(h) == 10000 + assert abs(np.mean(h) - 0.2) < 0.01 + + hv = base_model.mc.hazard(np.array([0.1, 0.2, 0.3, 0.4, 0.5])) + assert isinstance(hv, np.ndarray) + assert len(hv) == 5 + + # Exp.value = 1/p +/- 1/sqrt(N) + st = base_model.mc.stopping(0.1, 10000) + assert isinstance(st, np.ndarray) + assert len(st) == 10000 + assert abs(np.mean(st) / 10 - 1.0) < 0.03 + + sv = base_model.mc.stopping(np.array([0.1, 0.2, 0.3, 0.4, 0.5])) + assert isinstance(sv, np.ndarray) + assert len(sv) == 5 + + # Non-homogeneous Poisson process (time-dependent hazard) + nhpp = base_model.mc.first_arrival( + np.array([0.1, 0.2, 0.3, 0.4, 0.5]), 1.0, 10, 0.0 + ) + assert isinstance(nhpp, np.ndarray) + assert len(nhpp) == 10 def test_mc_parallel(base_model: no.Model, base_indep_model: no.Model) -> None: + if no.mpi.SIZE == 1: + return - if no.mpi.SIZE == 1: - return - - # test model has identical streams - mc = base_model.mc - mc.reset() - assert mc.seed() == 19937 + # test model has identical streams + mc = base_model.mc + mc.reset() + assert mc.seed() == 19937 - a = mc.ustream(5) - all_a = no.mpi.COMM.gather(a, root=0) - all_states = no.mpi.COMM.gather(mc.state(), root=0) + a = mc.ustream(5) + all_a = no.mpi.COMM.gather(a, root=0) + all_states = no.mpi.COMM.gather(mc.state(), root=0) - if no.mpi.RANK == 0: - assert all_a and all_states - for r in range(0, no.mpi.SIZE): - assert np.all(all_states[0] == all_states[r]) - assert np.all(a - all_a[r] == 0.0) + if no.mpi.RANK == 0: + assert all_a and all_states + for r in range(0, no.mpi.SIZE): + assert np.all(all_states[0] == all_states[r]) + assert np.all(a - all_a[r] == 0.0) - # test model_i has independent streams - mc = base_indep_model.mc - mc.reset() - assert mc.seed() == 19937 + no.mpi.RANK + # test model_i has independent streams + mc = base_indep_model.mc + mc.reset() + assert mc.seed() == 19937 + no.mpi.RANK - a = mc.ustream(5) - all_a = no.mpi.COMM.gather(a, root=0) - all_states = no.mpi.COMM.gather(mc.state(), root=0) + a = mc.ustream(5) + all_a = no.mpi.COMM.gather(a, root=0) + all_states = no.mpi.COMM.gather(mc.state(), root=0) - # check all other streams different - if no.mpi.RANK == 0: - assert all_a and all_states - for r in range(1, no.mpi.SIZE): - assert not np.all(a - all_a[r] == 0.0) + # check all other streams different + if no.mpi.RANK == 0: + assert all_a and all_states + for r in range(1, no.mpi.SIZE): + assert not np.all(a - all_a[r] == 0.0) def test_bitgen(base_model: no.Model) -> None: - base_model2 = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) - gen = no.as_np(base_model.mc) - - n = gen.bit_generator.random_raw() - assert n == base_model2.mc.raw() - assert (gen.uniform(size=100) == base_model2.mc.ustream(100)).all() - - # check the np gen gets the reset - base_model.mc.reset() - assert n == gen.bit_generator.random_raw() - - base_model.mc.reset() - base_model_different_seed = no.Model(no.NoTimeline(), lambda: 1234) - gen2 = no.as_np(base_model_different_seed.mc) - assert gen2.bit_generator.random_raw() != base_model.mc.raw() - assert (gen2.uniform(size=100) != base_model.mc.ustream(100)).all() + base_model2 = no.Model( + no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream + ) + gen = no.as_np(base_model.mc) + + n = gen.bit_generator.random_raw() + assert n == base_model2.mc.raw() + assert (gen.uniform(size=100) == base_model2.mc.ustream(100)).all() + + # check the np gen gets the reset + base_model.mc.reset() + assert n == gen.bit_generator.random_raw() + + base_model.mc.reset() + base_model_different_seed = no.Model(no.NoTimeline(), lambda: 1234) + gen2 = no.as_np(base_model_different_seed.mc) + assert gen2.bit_generator.random_raw() != base_model.mc.raw() + assert (gen2.uniform(size=100) != base_model.mc.ustream(100)).all() diff --git a/test/test_model.py b/test/test_model.py index b7c7cce8..b7735626 100644 --- a/test/test_model.py +++ b/test/test_model.py @@ -1,57 +1,95 @@ import pytest -import neworder as no +import neworder as no def test_base(base_model: no.Model) -> None: - with pytest.raises(RuntimeError): - no.run(base_model) # RuntimeError: Tried to call pure virtual function "Model::step" + with pytest.raises(RuntimeError): + no.run( + base_model + ) # RuntimeError: Tried to call pure virtual function "Model::step" def test_base_not_initialised() -> None: - class TestModel(no.Model): - def __init__(self) -> None: - pass - with pytest.raises(TypeError): - m = TestModel() + class TestModel(no.Model): + def __init__(self) -> None: + pass + + with pytest.raises(TypeError): + _ = TestModel() def test_default_seeder() -> None: - class DefaultModel(no.Model): - def __init__(self) -> None: - super().__init__(no.NoTimeline()) - self.x = self.mc.raw() + class DefaultModel(no.Model): + def __init__(self) -> None: + super().__init__(no.NoTimeline()) + self.x = self.mc.raw() - class ExplicitModel(no.Model): - def __init__(self) -> None: - super().__init__(no.NoTimeline(), no.MonteCarlo.deterministic_independent_stream) - self.x = self.mc.raw() + class ExplicitModel(no.Model): + def __init__(self) -> None: + super().__init__( + no.NoTimeline(), no.MonteCarlo.deterministic_independent_stream + ) + self.x = self.mc.raw() - class DifferentModel(no.Model): - def __init__(self) -> None: - super().__init__(no.NoTimeline(), lambda: 42) - self.x = self.mc.raw() + class DifferentModel(no.Model): + def __init__(self) -> None: + super().__init__(no.NoTimeline(), lambda: 42) + self.x = self.mc.raw() - assert DefaultModel().x == ExplicitModel().x - assert DefaultModel().x != DifferentModel().x + assert DefaultModel().x == ExplicitModel().x + assert DefaultModel().x != DifferentModel().x def test_multimodel() -> None: - - class TestModel(no.Model): - def __init__(self) -> None: - super().__init__(no.LinearTimeline(0, 10, 10), no.MonteCarlo.deterministic_identical_stream) - - self.x = 0.0 - - def step(self) -> None: - self.x += self.mc.ustream(1)[0] - - def finalise(self) -> None: - no.log(self.x) - - models = [TestModel(), TestModel()] - - [no.run(m) for m in models] - - assert models[0].x == models[1].x + class TestModel(no.Model): + def __init__(self) -> None: + super().__init__( + no.LinearTimeline(0, 10, 10), + no.MonteCarlo.deterministic_identical_stream, + ) + self.x = 0.0 + + def step(self) -> None: + self.x += self.mc.ustream(1)[0] + + def finalise(self) -> None: + no.log(self.x) + + models = [TestModel(), TestModel()] + + [no.run(m) for m in models] + + assert models[0].x == models[1].x + + +def test_runstate() -> None: + class TestModel(no.Model): + def __init__(self, *, do_halt: bool) -> None: + super().__init__(no.NoTimeline()) + self.do_halt = do_halt + self.finalised = False + + def step(self) -> None: + assert self.run_state == no.Model.RUNNING + if self.do_halt: + self.halt() + assert self.run_state == no.Model.HALTED + + def finalise(self) -> None: + assert self.run_state == no.Model.COMPLETED + self.finalised = True + + # run without halting + m = TestModel(do_halt=False) + assert m.run_state == no.Model.NOT_STARTED + no.run(m) + assert m.run_state == no.Model.COMPLETED + assert m.finalised + + # halt the run + m = TestModel(do_halt=True) + assert m.run_state == no.Model.NOT_STARTED + no.run(m) + assert m.run_state == no.Model.HALTED + assert not m.finalised diff --git a/test/test_module.py b/test/test_module.py index 95b6c792..dfeda39c 100644 --- a/test/test_module.py +++ b/test/test_module.py @@ -1,74 +1,77 @@ +import importlib +import warnings + import pytest -import warnings import neworder as no -warnings.filterwarnings(action='ignore', category=RuntimeWarning, message=r't=') +warnings.filterwarnings(action="ignore", category=RuntimeWarning, message=r"t=") def test_basics() -> None: - # just check you can read the attrs/call the functions - assert hasattr(no, "verbose") - assert hasattr(no, "checked") - assert hasattr(no, "__version__") - no.log("testing") - no.log(1) - no.log(no) - no.log([1, 2, 3]) - no.log((1, 2, 3)) - no.log({1: 2, 3:4}) + # just check you can read the attrs/call the functions + assert hasattr(no, "verbose") + assert hasattr(no, "checked") + assert hasattr(no, "__version__") + no.log("testing") + no.log(1) + no.log(no) + no.log([1, 2, 3]) + no.log((1, 2, 3)) + no.log({1: 2, 3: 4}) def test_submodules() -> None: - assert(hasattr(no, "mpi")) - assert(hasattr(no, "stats")) - assert(hasattr(no, "df")) + assert hasattr(no, "mpi") + assert hasattr(no, "stats") + assert hasattr(no, "df") def test_dummy_model() -> None: - class DummyModel(no.Model): - def __init__(self) -> None: - super().__init__(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + class DummyModel(no.Model): + def __init__(self) -> None: + super().__init__( + no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream + ) - def step(self) -> None: - pass + def step(self) -> None: + pass - def finalise(self) -> None: - pass + def finalise(self) -> None: + pass + + assert no.run(DummyModel()) - assert no.run(DummyModel()) @pytest.mark.filterwarnings("ignore:check()") def test_check_flag() -> None: - class FailingModel(no.Model): - def __init__(self) -> None: - super().__init__(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + class FailingModel(no.Model): + def __init__(self) -> None: + super().__init__( + no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream + ) - def step(self) -> None: - pass + def step(self) -> None: + pass - def check(self) -> bool: - return False + def check(self) -> bool: + return False - # fails - assert not no.run(FailingModel()) + # fails + assert not no.run(FailingModel()) - no.checked(False) - # succeeds - assert no.run(FailingModel()) + no.checked(False) + # succeeds + assert no.run(FailingModel()) def test_mpi() -> None: - # if no mpi4py, assume serial like module does - try: - import mpi4py.MPI as mpi # type: ignore[import] - except ImportError: - assert not no.mpi.COMM - assert no.mpi.RANK == 0 - assert no.mpi.SIZE == 1 - else: - assert no.mpi.COMM - assert no.mpi.RANK == no.mpi.COMM.Get_rank() - assert no.mpi.SIZE == no.mpi.COMM.Get_size() - - + # if no mpi4py, assume serial like module does + if importlib.util.find_spec("mpi4py") is None: + assert not no.mpi.COMM + assert no.mpi.RANK == 0 + assert no.mpi.SIZE == 1 + else: + assert no.mpi.COMM + assert no.mpi.RANK == no.mpi.COMM.Get_rank() + assert no.mpi.SIZE == no.mpi.COMM.Get_size() diff --git a/test/test_mpi.py b/test/test_mpi.py index 666de2f1..fae0f36c 100644 --- a/test/test_mpi.py +++ b/test/test_mpi.py @@ -1,83 +1,84 @@ -""" MPI tests """ +"""MPI tests""" from typing import Any + import numpy as np import pandas as pd + import neworder as no if no.mpi.SIZE == 1: - no.log("Not in parallel mode, skipping MPI tests") + no.log("Not in parallel mode, skipping MPI tests") else: - no.log("Parallel mode enabled, running MPI tests") - - def send_recv(x: Any) -> bool: - if no.mpi.RANK == 0: - no.mpi.COMM.send(x, dest=1) - if no.mpi.RANK == 1: - y = no.mpi.COMM.recv(source=0) - no.log("MPI: 0 sent {}={} 1 recd {}={}".format(type(x), x, type(y), y)) - if y != x: - return False - return True - - def test_scalar() -> None: - - assert send_recv(True) - assert send_recv(10) - assert send_recv(10.01) - assert send_recv("abcdef") - assert send_recv([1,2,3]) - assert send_recv({"a": "fghdfkgh"}) + no.log("Parallel mode enabled, running MPI tests") - def test_arrays() -> None: + def send_recv(x: Any) -> bool: + if no.mpi.RANK == 0: + no.mpi.COMM.send(x, dest=1) + if no.mpi.RANK == 1: + y = no.mpi.COMM.recv(source=0) + no.log("MPI: 0 sent {}={} 1 recd {}={}".format(type(x), x, type(y), y)) + if y != x: + return False + return True - x = np.array([1,4,9,16]) - if no.mpi.RANK == 0: - no.mpi.COMM.send(x, dest=1) - if no.mpi.RANK == 1: - y = no.mpi.COMM.recv(source=0) - assert np.array_equal(x,y) + def test_scalar() -> None: + assert send_recv(True) + assert send_recv(10) + assert send_recv(10.01) + assert send_recv("abcdef") + assert send_recv([1, 2, 3]) + assert send_recv({"a": "fghdfkgh"}) - df = pd.read_csv("./test/df2.csv") - if no.mpi.RANK == 0: - no.mpi.COMM.send(df, dest=1) - if no.mpi.RANK == 1: - dfrec = no.mpi.COMM.recv(source=0) - assert dfrec.equals(df) + def test_arrays() -> None: + x = np.array([1, 4, 9, 16]) + if no.mpi.RANK == 0: + no.mpi.COMM.send(x, dest=1) + if no.mpi.RANK == 1: + y = no.mpi.COMM.recv(source=0) + assert np.array_equal(x, y) - i = "rank %d" % no.mpi.RANK - root = 0 - i = no.mpi.COMM.bcast(i, root=root) - # all procs should now have root process value - assert i == "rank 0" + df = pd.read_csv("./test/df2.csv") + if no.mpi.RANK == 0: + no.mpi.COMM.send(df, dest=1) + if no.mpi.RANK == 1: + dfrec = no.mpi.COMM.recv(source=0) + assert dfrec.equals(df) - # a0 will be different for each proc - a0 = np.random.rand(2,2) - a1 = no.mpi.COMM.bcast(a0, root) - # a1 will equal a0 on rank 0 only - if no.mpi.RANK == 0: - assert np.array_equal(a0, a1) - else: - assert not np.array_equal(a0, a1) + i = "rank %d" % no.mpi.RANK + root = 0 + i = no.mpi.COMM.bcast(i, root=root) + # all procs should now have root process value + assert i == "rank 0" - # base model for MC engine - model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) + # a0 will be different for each proc + a0 = np.random.rand(2, 2) + a1 = no.mpi.COMM.bcast(a0, root) + # a1 will equal a0 on rank 0 only + if no.mpi.RANK == 0: + assert np.array_equal(a0, a1) + else: + assert not np.array_equal(a0, a1) - # # check identical streams (independent=False) - u = model.mc.ustream(1000) - v = no.mpi.COMM.bcast(u, root=root) - # u == v on all processes - assert np.array_equal(u, v) + # base model for MC engine + model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) - # base model for MC engine - model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_independent_stream) + # # check identical streams (independent=False) + u = model.mc.ustream(1000) + v = no.mpi.COMM.bcast(u, root=root) + # u == v on all processes + assert np.array_equal(u, v) - # # check identical streams (independent=False) - u = model.mc.ustream(1000) - v = no.mpi.COMM.bcast(u, root=root) - # u != v on all non-root processes - if no.mpi.RANK != root: - assert not np.array_equal(u, v) - else: - assert np.array_equal(u, v) + # base model for MC engine + model = no.Model( + no.NoTimeline(), no.MonteCarlo.deterministic_independent_stream + ) + # # check identical streams (independent=False) + u = model.mc.ustream(1000) + v = no.mpi.COMM.bcast(u, root=root) + # u != v on all non-root processes + if no.mpi.RANK != root: + assert not np.array_equal(u, v) + else: + assert np.array_equal(u, v) diff --git a/test/test_stats.py b/test/test_stats.py index 9b571da7..917f1ec1 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -1,20 +1,19 @@ - import numpy as np + import neworder as no -import pytest -def test_logistic_logit() -> None: - n = 100 # wont work if odd! +def test_logistic_logit() -> None: + n = 100 # wont work if odd! - x = np.linspace(-10.0, 10.0, n+1) - y = no.stats.logistic(x) - assert np.all(y >= -1) - assert np.all(y <= 1) - assert y[n//2] == 0.5 + x = np.linspace(-10.0, 10.0, n + 1) + y = no.stats.logistic(x) + assert np.all(y >= -1) + assert np.all(y <= 1) + assert y[n // 2] == 0.5 - assert np.all(np.fabs(y + y[::-1] - 1.0) < 1e-15) + assert np.all(np.fabs(y + y[::-1] - 1.0) < 1e-15) - x2 = no.stats.logit(y) + x2 = no.stats.logit(y) - assert np.all(np.fabs(x2 - x) < 2e-12) \ No newline at end of file + assert np.all(np.fabs(x2 - x) < 2e-12) diff --git a/test/test_timeline.py b/test/test_timeline.py index e08438f5..0d348d88 100644 --- a/test/test_timeline.py +++ b/test/test_timeline.py @@ -1,371 +1,398 @@ +from datetime import date, datetime from typing import cast -from datetime import datetime, date -import pytest + import numpy as np +import pytest + import neworder as no class _TestModel(no.Model): - def __init__(self) -> None: - # 10 steps of 10 - super().__init__(no.LinearTimeline(0,100,10), no.MonteCarlo.deterministic_identical_stream) + def __init__(self) -> None: + # 10 steps of 10 + super().__init__( + no.LinearTimeline(0, 100, 10), no.MonteCarlo.deterministic_identical_stream + ) - self.step_count = 0 - self.t_end = 100 - self.i_end = 10 + self.step_count = 0 + self.t_end = 100 + self.i_end = 10 - def step(self) -> None: - self.step_count += 1 + def step(self) -> None: + self.step_count += 1 - def finalise(self) -> None: - assert self.timeline.time == self.t_end and self.timeline.index == self.timeline.index + def finalise(self) -> None: + assert ( + self.timeline.time == self.t_end + and self.timeline.index == self.timeline.index + ) class _TestModel2(no.Model): - def __init__(self, start: float, end: float, steps: int) -> None: - super().__init__(no.LinearTimeline(start, end, steps), no.MonteCarlo.deterministic_identical_stream) + def __init__(self, start: float, end: float, steps: int) -> None: + super().__init__( + no.LinearTimeline(start, end, steps), + no.MonteCarlo.deterministic_identical_stream, + ) - self.i = 0 - self.t = start - self.steps = steps - self.end = end + self.i = 0 + self.t = start + self.steps = steps + self.end = end - def step(self) -> None: - self.i += 1 - self.t += self.timeline.dt + def step(self) -> None: + self.i += 1 + self.t += self.timeline.dt + + def check(self) -> bool: + return self.timeline.index == self.i and self.timeline.time == self.t - def check(self) -> bool: - return self.timeline.index == self.i and self.timeline.time == self.t + def finalise(self) -> None: + assert self.timeline.at_end and self.timeline.index == self.steps - def finalise(self) -> None: - assert self.timeline.at_end and self.timeline.index == self.steps class _TestResume(no.Model): - def __init__(self, t0: float, n: int) -> None: - super().__init__(no.LinearTimeline(t0, t0 + n, n), no.MonteCarlo.deterministic_identical_stream) + def __init__(self, t0: float, n: int) -> None: + super().__init__( + no.LinearTimeline(t0, t0 + n, n), + no.MonteCarlo.deterministic_identical_stream, + ) - def step(self) -> None: - self.halt() + def step(self) -> None: + self.halt() class CustomTimeline(no.Timeline): - def __init__(self) -> None: - # NB base class takes care of index - super().__init__() - self.t = 1.0 + def __init__(self) -> None: + # NB base class takes care of index + super().__init__() + self.t = 1.0 - @property - def start(self) -> float: - return 1.0 + @property + def start(self) -> float: + return 1.0 - @property - def end(self) -> float: - return 0.0 + @property + def end(self) -> float: + return 0.0 - @property - def nsteps(self) -> int: - return -1 + @property + def nsteps(self) -> int: + return -1 - @property - def time(self) -> float: - return 1.0 - self.t + @property + def time(self) -> float: + return 1.0 - self.t - @property - def dt(self) -> float: - return self.t / 2 + @property + def dt(self) -> float: + return self.t / 2 - def _next(self) -> None: - self.t /= 2 + def _next(self) -> None: + self.t /= 2 - @property - def at_end(self) -> bool: - return False + @property + def at_end(self) -> bool: + return False class CustomTimelineModel(no.Model): - def __init__(self) -> None: - super().__init__(CustomTimeline(), no.MonteCarlo.deterministic_identical_stream) + def __init__(self) -> None: + super().__init__(CustomTimeline(), no.MonteCarlo.deterministic_identical_stream) - def step(self) -> None: - self.halt() + def step(self) -> None: + self.halt() def test_timeline_properties() -> None: - n = no.NoTimeline() - assert n.index == 0 - assert np.isnan(n.start) # type: ignore[call-overload] - assert np.isnan(n.time) # type: ignore[call-overload] - assert np.isnan(n.end) # type: ignore[call-overload] - assert n.dt == 0.0 - assert n.nsteps == 1 - - - with pytest.raises(AttributeError): - n.index = 3 # type: ignore[misc] - with pytest.raises(AttributeError): - n.next() # type: ignore[attr-defined] - c = CustomTimeline() - with pytest.raises(AttributeError): - c.index = 3 # type: ignore[misc] - # for python implementations next must be exposed - # with pytest.raises(AttributeError): - # c.next() + n = no.NoTimeline() + assert n.index == 0 + assert np.isnan(n.start) # type: ignore[call-overload] + assert np.isnan(n.time) # type: ignore[call-overload] + assert np.isnan(n.end) # type: ignore[call-overload] + assert n.dt == 0.0 + assert n.nsteps == 1 + + with pytest.raises(AttributeError): + n.index = 3 # type: ignore[misc] + with pytest.raises(AttributeError): + n.next() # type: ignore[attr-defined] + c = CustomTimeline() + with pytest.raises(AttributeError): + c.index = 3 # type: ignore[misc] + # for python implementations next must be exposed + # with pytest.raises(AttributeError): + # c.next() def test_custom_timeline() -> None: - ct = CustomTimeline() - # default __repr__ - assert str(ct) == "" - m = CustomTimelineModel() - assert no.run(m) - assert str(m.timeline) == "" + ct = CustomTimeline() + # default __repr__ + assert str(ct) == "" + m = CustomTimelineModel() + assert no.run(m) + assert str(m.timeline) == "" def test_time() -> None: - t = -1e10 - assert no.time.DISTANT_PAST < t - assert no.time.FAR_FUTURE > t - t = 1e10 - assert no.time.DISTANT_PAST < t - assert no.time.FAR_FUTURE > t - - # dreams never end - assert no.time.NEVER != no.time.NEVER - assert no.time.NEVER != t - assert not no.time.NEVER < t - assert not no.time.NEVER == t - assert not no.time.NEVER >= t - # no nay never - assert not no.time.isnever(t) - # no nay never no more - assert no.time.isnever(no.time.NEVER) + t = -1e10 + assert no.time.DISTANT_PAST < t + assert no.time.FAR_FUTURE > t + t = 1e10 + assert no.time.DISTANT_PAST < t + assert no.time.FAR_FUTURE > t + + # dreams never end + assert no.time.NEVER != no.time.NEVER + assert no.time.NEVER != t + assert not no.time.NEVER < t + assert not no.time.NEVER == t + assert not no.time.NEVER >= t + # no nay never + assert not no.time.isnever(t) + # no nay never no more + assert no.time.isnever(no.time.NEVER) def test_null_timeline() -> None: - t0 = no.NoTimeline() - assert t0.nsteps == 1 - assert t0.dt == 0.0 - assert not t0.at_end - assert t0.index == 0 - assert no.time.isnever(t0.time) # type: ignore[call-overload] - assert no.time.isnever(t0.end) # type: ignore[call-overload] - - m = _TestModel2(0, 1, 1) - no.run(m) - assert m.timeline.at_end - assert m.timeline.index == 1 - assert m.timeline.time == 1.0 + t0 = no.NoTimeline() + assert t0.nsteps == 1 + assert t0.dt == 0.0 + assert not t0.at_end + assert t0.index == 0 + assert no.time.isnever(t0.time) # type: ignore[call-overload] + assert no.time.isnever(t0.end) # type: ignore[call-overload] + + m = _TestModel2(0, 1, 1) + no.run(m) + assert m.timeline.at_end + assert m.timeline.index == 1 + assert m.timeline.time == 1.0 def test_timeline_validation() -> None: - - with pytest.raises(TypeError): - no.LinearTimeline(2020, 2020, []) # type: ignore[call-overload] - with pytest.raises(ValueError): - no.LinearTimeline(2020, 0.0) - with pytest.raises(ValueError): - no.LinearTimeline(2020, -1.0) - with pytest.raises(ValueError): - no.LinearTimeline(2020, 2019, 1) - with pytest.raises(ValueError): - no.LinearTimeline(2020, 2021, 0) - with pytest.raises(ValueError): - no.NumericTimeline([2021, 2020]) - with pytest.raises(ValueError): - no.NumericTimeline([2020]) - with pytest.raises(ValueError): - no.CalendarTimeline(date(2021, 1, 1), 0, "y") - with pytest.raises(ValueError): - no.CalendarTimeline(date(2021, 1, 1), 12, "n") - with pytest.raises(ValueError): - no.CalendarTimeline(date(2021, 1, 1), date(2020, 1, 1), 1, "m") - with pytest.raises(ValueError): - no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), 1, "w") - with pytest.raises(ValueError): - no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), 1, "q") - with pytest.raises(ValueError): - no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), 0, "m")# - - # NOTE: passing a -ve int leads to a *TypeError* (when casting to size_t is attempted) - with pytest.raises(TypeError): - no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), -1, "m") + with pytest.raises(TypeError): + no.LinearTimeline(2020, 2020, []) # type: ignore[call-overload] + with pytest.raises(ValueError): + no.LinearTimeline(2020, 0.0) + with pytest.raises(ValueError): + no.LinearTimeline(2020, -1.0) + with pytest.raises(ValueError): + no.LinearTimeline(2020, 2019, 1) + with pytest.raises(ValueError): + no.LinearTimeline(2020, 2021, 0) + with pytest.raises(ValueError): + no.NumericTimeline([2021, 2020]) + with pytest.raises(ValueError): + no.NumericTimeline([2020]) + with pytest.raises(ValueError): + no.CalendarTimeline(date(2021, 1, 1), 0, "y") + with pytest.raises(ValueError): + no.CalendarTimeline(date(2021, 1, 1), 12, "n") + with pytest.raises(ValueError): + no.CalendarTimeline(date(2021, 1, 1), date(2020, 1, 1), 1, "m") + with pytest.raises(ValueError): + no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), 1, "w") + with pytest.raises(ValueError): + no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), 1, "q") + with pytest.raises(ValueError): + no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), 0, "m") # + + # NOTE: passing a -ve int leads to a *TypeError* (when casting to size_t is attempted) + with pytest.raises(TypeError): + no.CalendarTimeline(date(2019, 1, 1), date(2020, 1, 1), -1, "m") def test_linear_timeline() -> None: - # 40 years annual steps - m = _TestModel2(2011, 2051, 40) - assert m.timeline.time == 2011 - assert m.timeline.dt == 1.0 - assert m.timeline.index == 0 - assert m.timeline.end == 2051 + # 40 years annual steps + m = _TestModel2(2011, 2051, 40) + assert m.timeline.time == 2011 + assert m.timeline.dt == 1.0 + assert m.timeline.index == 0 + assert m.timeline.end == 2051 - no.run(m) - assert m.timeline.index == 40 - assert m.timeline.time == 2051 + no.run(m) + assert m.timeline.index == 40 + assert m.timeline.time == 2051 def test_numeric_timeline() -> None: - class NumericTimelineModel(no.Model): - def __init__(self, numerictimeline: no.Timeline) -> None: - super().__init__(numerictimeline, no.MonteCarlo.deterministic_identical_stream) - def step(self) -> None: - assert self.timeline.dt == 1/16 - assert self.timeline.time == self.timeline.index / 16 - - def finalise(self) -> None: - assert self.timeline.time == 1.0 - assert self.timeline.time == self.timeline.end - assert self.timeline.index == 16 - # 16 steps to avoid rounding errors - m = NumericTimelineModel(no.NumericTimeline(np.linspace(0.0, 1.0, 17).tolist())) - assert m.timeline.time == 0.0 - assert m.timeline.index == 0 - no.run(m) + class NumericTimelineModel(no.Model): + def __init__(self, numerictimeline: no.Timeline) -> None: + super().__init__( + numerictimeline, no.MonteCarlo.deterministic_identical_stream + ) + + def step(self) -> None: + assert self.timeline.dt == 1 / 16 + assert self.timeline.time == self.timeline.index / 16 + + def finalise(self) -> None: + assert self.timeline.time == 1.0 + assert self.timeline.time == self.timeline.end + assert self.timeline.index == 16 + + # 16 steps to avoid rounding errors + m = NumericTimelineModel(no.NumericTimeline(np.linspace(0.0, 1.0, 17).tolist())) + assert m.timeline.time == 0.0 + assert m.timeline.index == 0 + no.run(m) def test_calendar_timeline() -> None: - # monthly timesteps checking we don't overshoot in shorter months - dim = [31, 29, 31, 30, 31, 30] + # monthly timesteps checking we don't overshoot in shorter months + dim = [31, 29, 31, 30, 31, 30] - class CalendarModel(no.Model): - def __init__(self, calendartimeline: no.Timeline) -> None: - super().__init__(calendartimeline, no.MonteCarlo.deterministic_identical_stream) + class CalendarModel(no.Model): + def __init__(self, calendartimeline: no.Timeline) -> None: + super().__init__( + calendartimeline, no.MonteCarlo.deterministic_identical_stream + ) - def step(self) -> None: - assert cast(date, self.timeline.time).day == min(dim[self.timeline.index], d) + def step(self) -> None: + assert cast(date, self.timeline.time).day == min( + dim[self.timeline.index], d + ) - def finalise(self) -> None: - assert self.timeline.dt == 0.0 - assert self.timeline.time == self.timeline.end - assert self.timeline.index == 6 + def finalise(self) -> None: + assert self.timeline.dt == 0.0 + assert self.timeline.time == self.timeline.end + assert self.timeline.index == 6 - for d in range(1,32): - t = no.CalendarTimeline(date(2020, 1, d), date(2020, 7, d), 1, "m") + for d in range(1, 32): + t = no.CalendarTimeline(date(2020, 1, d), date(2020, 7, d), 1, "m") + + m = CalendarModel(t) + no.run(m) - m = CalendarModel(t) - no.run(m) def test_open_ended_timeline() -> None: + class OpenEndedModel(no.Model): + def __init__(self, timeline: no.Timeline) -> None: + super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) + self.i = 0 + + def step(self) -> None: + assert self.i == self.timeline.index + self.i += 1 + if self.i > 10: + self.halt() + + m = OpenEndedModel(no.LinearTimeline(0, 1)) + assert m.timeline.end == no.time.FAR_FUTURE + assert m.timeline.nsteps == -1 + assert m.timeline.dt == 1.0 + no.run(m) + assert m.i == 11 - class OpenEndedModel(no.Model): - def __init__(self, timeline: no.Timeline) -> None: - super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) - self.i = 0 + m = OpenEndedModel(no.CalendarTimeline(date(2020, 12, 17), 1, "d")) + assert m.timeline.end == no.time.FAR_FUTURE + assert m.timeline.nsteps == -1 + assert np.fabs(m.timeline.dt - 1.0 / 365.2475) < 1e-8 + no.run(m) + assert m.i == 11 + + m = OpenEndedModel(no.CalendarTimeline(date(2020, 12, 17), 1, "m")) + assert m.timeline.end == no.time.FAR_FUTURE + assert m.timeline.nsteps == -1 + assert np.fabs(m.timeline.dt - 31.0 / 365.2475) < 1e-8 + no.run(m) + assert m.i == 11 - def step(self) -> None: - assert self.i == self.timeline.index - self.i += 1 - if self.i > 10: self.halt() - - m = OpenEndedModel(no.LinearTimeline(0, 1)) - assert m.timeline.end == no.time.FAR_FUTURE - assert m.timeline.nsteps == -1 - assert m.timeline.dt == 1.0 - no.run(m) - assert m.i == 11 - - m = OpenEndedModel(no.CalendarTimeline(date(2020, 12, 17), 1, "d")) - assert m.timeline.end == no.time.FAR_FUTURE - assert m.timeline.nsteps == -1 - assert np.fabs(m.timeline.dt - 1.0/365.2475) < 1e-8 - no.run(m) - assert m.i == 11 - - m = OpenEndedModel(no.CalendarTimeline(date(2020, 12, 17), 1, "m")) - assert m.timeline.end == no.time.FAR_FUTURE - assert m.timeline.nsteps == -1 - assert np.fabs(m.timeline.dt - 31.0 / 365.2475) < 1e-8 - no.run(m) - assert m.i == 11 def test_model() -> None: - model = _TestModel() - no.run(model) - assert model.step_count == 10 + model = _TestModel() + no.run(model) + assert model.step_count == 10 + # check the timestepping is consistent across the different timeline implementations def test_consistency() -> None: + # need to wrap timeline in a model to do the stepping, which isnt directly accessible from python + class ConsistencyTest(no.Model): + def __init__(self, timeline: no.Timeline) -> None: + super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) - # need to wrap timeline in a model to do the stepping, which isnt directly accessible from python - class ConsistencyTest(no.Model): - def __init__(self, timeline: no.Timeline) -> None: - super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) + def step(self) -> None: + pass - def step(self) -> None: - pass + m = ConsistencyTest(no.NoTimeline()) + assert m.timeline.nsteps == 1 + no.run(m) + assert m.timeline.index == 1 - m = ConsistencyTest(no.NoTimeline()) - assert m.timeline.nsteps == 1 - no.run(m) - assert m.timeline.index == 1 + m = ConsistencyTest(no.LinearTimeline(2020, 2021, 12)) - m = ConsistencyTest(no.LinearTimeline(2020, 2021, 12)) + assert m.timeline.nsteps == 12 + no.run(m) + assert m.timeline.index == 12 + assert m.timeline.time == 2021 - assert m.timeline.nsteps == 12 - no.run(m) - assert m.timeline.index == 12 - assert m.timeline.time == 2021 + m = ConsistencyTest(no.NumericTimeline([2020 + i / 12 for i in range(13)])) + assert m.timeline.nsteps == 12 + no.run(m) + assert m.timeline.index == 12 + assert m.timeline.time == 2021 - m = ConsistencyTest(no.NumericTimeline([2020 + i/12 for i in range(13)])) - assert m.timeline.nsteps == 12 - no.run(m) - assert m.timeline.index == 12 - assert m.timeline.time == 2021 + s = date(2019, 10, 31) + e = date(2020, 10, 31) - s = date(2019, 10, 31) - e = date(2020, 10, 31) + m = ConsistencyTest(no.CalendarTimeline(s, e, 1, "m")) + assert cast(datetime, m.timeline.time).date() == s + assert m.timeline.nsteps == 12 + no.run(m) + assert cast(datetime, m.timeline.time).date() == e + assert m.timeline.index == 12 - m = ConsistencyTest(no.CalendarTimeline(s, e, 1, "m")) - assert cast(datetime, m.timeline.time).date() == s - assert m.timeline.nsteps == 12 - no.run(m) - assert cast(datetime, m.timeline.time).date() == e - assert m.timeline.index == 12 def test_resume() -> None: - t0 = 0.1 - n = 10 - m = _TestResume(t0, n) # unit timesteps + t0 = 0.1 + n = 10 + m = _TestResume(t0, n) # unit timesteps - t = t0 - while not m.timeline.at_end: - no.run(m) - t += 1 - assert m.timeline.time == t + t = t0 + while not m.timeline.at_end: + no.run(m) + t += 1 + assert m.timeline.time == t + + assert m.timeline.time == t0 + n - assert m.timeline.time == t0 + n # check that halt/finalise interaction works as expected def test_halt_finalise() -> None: + class HCModel(no.Model): + def __init__(self, timeline: no.Timeline, halt: bool = False) -> None: + super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) + self.do_halt = halt + self.finalise_called = False - class HCModel(no.Model): - def __init__(self, timeline: no.Timeline, halt: bool=False) -> None: - super().__init__(timeline, no.MonteCarlo.deterministic_identical_stream) - self.do_halt = halt - self.finalise_called = False + def step(self) -> None: + if self.do_halt: + self.halt() - def step(self) -> None: - if self.do_halt: - self.halt() + def finalise(self) -> None: + self.finalise_called = True - def finalise(self) -> None: - self.finalise_called = True - - m = HCModel(no.LinearTimeline(0,3,3)) - no.run(m) - assert m.finalise_called - - m = HCModel(no.LinearTimeline(0,3,3), True) - no.run(m) - assert not m.finalise_called - assert not m.timeline.at_end - assert m.timeline.index == 1 - no.run(m) - assert not m.finalise_called - assert not m.timeline.at_end - assert m.timeline.index == 2 - no.run(m) - assert m.finalise_called - assert m.timeline.at_end - assert m.timeline.index == 3 + m = HCModel(no.LinearTimeline(0, 3, 3)) + no.run(m) + assert m.finalise_called + + m = HCModel(no.LinearTimeline(0, 3, 3), True) + no.run(m) + assert not m.finalise_called + assert not m.timeline.at_end + assert m.timeline.index == 1 + # resume + no.run(m) + assert not m.finalise_called + assert not m.timeline.at_end + assert m.timeline.index == 2 + m.do_halt = False + no.run(m) + assert m.finalise_called + assert m.timeline.at_end + assert m.timeline.index == 3 + with pytest.raises(StopIteration): + no.run(m) diff --git a/untested/people_multi/people_multi.md b/untested/people_multi/people_multi.md index eb2df8de..f2ce5202 100644 --- a/untested/people_multi/people_multi.md +++ b/untested/people_multi/people_multi.md @@ -2,7 +2,7 @@ The above model has been modified to run in massively parallel mode using [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface), for the entire population of England & Wales (approx 56 million people as of 2011 census). The input data is not under source control due to its size, but the 348 input files (one per local authority) are divided roughly equally over the MPI processes. This particular example, with its simple in-out migration model, lends itself easily to parallel execution as no interprocess communication is required. Future development of this package will enable interprocess communication, for e.g. moving people from one region to another. -The microsimulation has been run on the ARC3[[2]](#references) cluster and took a little over 4 minutes on 48 cores to simulate the population over a 40 year period. +The microsimulation has been run on the ARC3[[2]](../references.md#2) cluster and took a little over 4 minutes on 48 cores to simulate the population over a 40 year period. See the [examples/people_multi](examples/people_multi) directory and the script [mpi_job.sh](mpi_job.sh)