diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7a4a16d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 + +RUN apt update &&\ + apt upgrade -y &&\ + apt install -y libgl1 &&\ + apt install -y libglib2.0-0 &&\ + apt install -y gdal-bin &&\ + apt install -y wget &&\ + apt install -y python3-pip &&\ + apt install -y python-is-python3 + +WORKDIR /app + +ADD requirements.txt . +RUN pip install -r requirements.txt --no-cache-dir + +ADD helpers/*.py helpers/ +ADD scripts/*.py scripts/ + +ADD setup.py . +RUN pip install . + +USER 65534:65534 + +ENV MPLCONFIGDIR /tmp + +ENTRYPOINT [""] +CMD ["stdl-objdet", "-h"] diff --git a/README.md b/README.md index cb8412a..9a47a2a 100644 --- a/README.md +++ b/README.md @@ -1,67 +1,133 @@ # Object Detector -This project provides a suite of Python scripts allowing the end-user to use Deep Learning to detect objects in georeferenced raster images. +This project provides a suite of Python scripts allowing the end-user to use Deep Learning to detect objects in geo-referenced raster images. -## Hardware requirements +### Table of contents -A CUDA-capable system is required. +- [Requirements](#requirements) + - [Hardware](#hardware) + - [Software](#software) +- [Installation](#installation) +- [How-to](#how-to) +- [Examples](#examples) +- [License](#license) -## Software Requirements +## Requirements -* Python 3.8 +### Hardware -* Dependencies may be installed with either `pip` or `conda`, by making use of the provided `requirements.txt` file. The following method was tested successfully on a Linux system powered by CUDA 11.3: +A CUDA-enabled GPU is required. - ```bash - $ conda create -n -c conda-forge python=3.8 - $ conda activate - $ pip install -r requirements.txt - ``` +### Software -## Scripts +* CUDA driver. This code was developed and tested with CUDA 11.3 on Ubuntu 20.04. -Four scripts can be found in the `scripts` subfolder: +* Although we recommend the usage of [Docker](https://www.docker.com/) (see [here](#with-docker)), this code can also be run without Docker, provided that Python 3.8 is available. Python dependencies may be installed with either `pip` or `conda`, using the provided `requirements.txt` file. We advise using a [Python virtual environment](https://docs.python.org/3/library/venv.html). -1. `generate_tilesets.py` -2. `train_model.py` -3. `make_predictions.py` -4. `assess_predictions.py` +## Installation -which can be run one after the other following this very order, by issuing the following command from a terminal: +### Without Docker + +The object detector can be installed by issuing the following command (see [this page](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) for more information on the "editable installs"): + +```bash +$ pip install --editable . +``` + +In case of a successful installation, the command + +```bash +$ stdl-objdet -h +``` + +should display some basic usage information. + +### With Docker + +A Docker image can be built by issuing the following command: + +```bash +$ docker compose build +``` + +In case of a successful build, the command ```bash -$ python / -``` +$ docker compose run --rm stdl-objdet stdl-objdet -h +``` + +should display some basic usage information. Note that, for the code to run properly, + +1. the version of the CUDA driver installed on the host machine must match with the version used in the [Dockerfile](Dockerfile), namely version 11.3. We let end-user adapt the Dockerfile to her/his environment. +2. The NVIDIA Container Toolkit must be installed on the host machine (see [this guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)). + +## How-to + +### + +This project implements the workflow described [here](https://tech.stdl.ch/TASK-IDET/#workflow), which includes four stages: + +| Stage no. | Stage name | CLI command | Implementation | +| :-: | --- | --- | --- | +| 1 | Tileset generation | `generate_tilesets` | [here](scripts/generate_tilesets.py) | +| 2 | Model training | `train_model` | [here](scripts/train_model.py) | +| 3 | Detection | `make_detections` | [here](scripts/make_detections.py) | +| 4 | Assessment | `assess_detections` | [here](scripts/assess_detections.py) | + +These stages/scripts can be run one after the other, by issuing the following command from a terminal: + +* w/o Docker: + + ```bash + $ stdl-objdet + ``` + +* w/ Docker: -Note concerning **inference-only scenarios**: the execution of the `train_model.py` script can be skipped in case the user wishes to only perform inference, using a model trained in advance. + ```bash + $ docker compose run --rm -it stdl-objdet stdl-objdet -The same configuration file can be used for all the scripts, as each script only reads the content related to a key named after itself - further details on the configuration file will be provided here-below. Before terminating, each script prints the list of output files: we strongly encourage the end-user to review those files, *e.g.* by loading them into [QGIS](https://qgis.org). + ``` -The following terminology will be used throughout the rest of this document: + Alternatively, + + ```bash + $ docker compose run --rm -it stdl-objdet + ``` -* **ground-truth data**: data to be used to train the Deep Learning-based predictive model; such data is expected to be 100% true + then + + ``` + nobody@:/app# stdl-objdet + ``` + + For those who are less familiar with Docker, know that all output files created inside a container are not persistent, unless "volumes" or "bind mounts" are used (see [this](https://docs.docker.com/storage/)). + +The same configuration file can be used for all the commands, as each of them only reads the content related to a key named after its name. More detailed information about each stage and the related configuration is provided here-below. The following terminology is used: + +* **ground-truth data**: data to be used to train the Deep Learning-based detection model; such data is expected to be 100% true * **GT**: abbreviation of ground-truth * **other data**: data that is not ground-truth-grade -* **labels**: georeferenced polygons surrounding the objects targeted by a given analysis +* **labels**: geo-referenced polygons surrounding the objects targeted by a given analysis * **AoI**, abbreviation of "Area of Interest": geographical area over which the user intend to carry out the analysis. This area encompasses * regions for which ground-truth data is available, as well as * regions over which the user intends to detect potentially unknown objects -* **tiles**, or - more explicitly - "geographical map tiles": cf. [this link](https://wiki.openstreetmap.org/wiki/Tiles). More precisely, "Slippy Map Tiles" are used within this project, cf. [this link](https://developers.planet.com/tutorials/slippy-maps-101/). +* **tiles**, or - more explicitly - "geographical map tiles": see [this link](https://wiki.openstreetmap.org/wiki/Tiles). More precisely, "Slippy Map Tiles" are used within this project, see [this link](https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames). -* **COCO data format**: cf. [this link](https://cocodataset.org/#format-data) +* **COCO data format**: see [this link](https://cocodataset.org/#format-data) * **trn**, **val**, **tst**, **oth**: abbreviations of "training", "validation", "test" and "other", respectively -### 1. `generate_tilesets.py` +### Stage 1: tileset generation -This script generates the various tilesets concerned by a given study. Each generated tileset is made up by: +This `generate_tilesets` command generates the various tilesets concerned by a given study. Each generated tileset is made up by: -* a collection of georeferenced raster images (in GeoTIFF format) +* a collection of geo-referenced raster images (in GeoTIFF format) * a JSON file compliant with the [COCO data format](https://cocodataset.org/#format-data) The following relations apply: @@ -82,19 +148,13 @@ In order to speed up some of the subsequent computations, each output image is a * bounding box; * spatial reference system. -The script can be run by issuing the following command from a terminal: - -```bash -$ python /generate_tilesets.py -``` - -Here's the excerpt of the configuration file relevant to this script, with values replaced by textual documentation: +Here's the excerpt of the configuration file relevant to this script, with values replaced by some documentation: ```yaml generate_tilesets.py: debug_mode: datasets: - aoi_tiles_geojson: + aoi_tiles_geojson: ground_truth_labels_geojson: other_labels_geojson: orthophotos_web_service: @@ -107,14 +167,14 @@ generate_tilesets.py: overwrite: n_jobs: COCO_metadata: - year: - version: - description: - contributor: - url: + year: + version: + description: + contributor: + url: license: - name: - url: + name: + url: category: name: supercategory: @@ -129,15 +189,12 @@ Note that: 2. the `id` field must not contain any duplicate value; 3. values of the `id` field must follow the following pattern: `(, , )`, e.g. `(135571, 92877, 18)`. -### 2. `train_model.py` - -This script allows one to train a predictive model based on a Convolutional Deep Neural Network, leveraging [FAIR's Detectron2](https://github.com/facebookresearch/detectron2). For further information, we refer the user to the [official documention](https://detectron2.readthedocs.io/en/latest/). +### Stage 2: model training -The script can be run by issuing the following command from a terminal: +> **Note** +This stage can be skipped if the user wishes to perform inference only, using a pre-trained model. -```bash -$ python /train_model.py -``` +The `train_model` command allows one to train a detection model based on a Convolutional Deep Neural Network, leveraging [FAIR's Detectron2](https://github.com/facebookresearch/detectron2). For further information, we refer the user to the [official documention](https://detectron2.readthedocs.io/en/latest/). Here's the excerpt of the configuration file relevant to this script, with values replaced by textual documentation: @@ -155,25 +212,19 @@ train_model.py: model_zoo_checkpoint_url: ``` -Detectron2 configuration files are provided in the example folders mentioned here-below. We warn the end-user about the fact that, **for the time being, no hyperparameters tuning is automatically performed by this suite of scripts**. - -### 3. `make_predictions.py` +Detectron2 configuration files are provided in the example folders mentioned here-below. We warn the end-user about the fact that, **for the time being, no hyperparameters tuning is automatically performed**. -This script allows to use the predictive model trained at the previous step to make predictions over various input datasets: +### Stage 3: detection -* predictions over the `trn`, `val`, `tst` datasets can be used to assess the reliability of this approach on ground-truth data; -* predictions over the `oth` dataset are, in principle, the main goal of this kind of analyses. +The `make_detections` command allows one to use the object detection model trained at the previous step to make detections over various input datasets: -The script can be run by issuing the following command from a terminal: - -```bash -$ python /make_predictions.py -``` +* detections over the `trn`, `val`, `tst` datasets can be used to assess the reliability of this approach on ground-truth data; +* detections over the `oth` dataset are, in principle, the main goal of this kind of analyses. Here's the excerpt of the configuration file relevant to this script, with values replaced by textual documentation: ```yaml -make_predictions.py: +make_detections.py: working_folder: log_subfolder: sample_tagged_img_subfolder: @@ -185,52 +236,58 @@ make_predictions.py: detectron2_config_file: model_weights: pth_file: + image_metadata_json: + # the following section concerns the Ramer-Douglas-Peucker algorithm, + # which can be optionally applied to detections before they are exported + rdp_simplification: + enabled: + epsilon: + score_lower_threshold: ``` -### 4. `assess_predictions.py` +### Stage 4: assessment -This script allows one to assess the reliability of predictions made by the previous script, comparing predictions with ground-truth data. The assessment goes through the following steps: +The `assess_detections` command allows one to assess the reliability of detections, comparing detections with ground-truth data. The assessment goes through the following steps: -1. Labels (GT + `oth`) geometries are clipped to the boundaries of the various AoI tiles, scaled by a factor 0.999 in order to prevent any "crosstalk" between neighbouring tiles. +1. Labels (GT + `oth`) geometries are clipped to the boundaries of the various AoI tiles, scaled by a factor 0.999 in order to prevent any "crosstalk" between neighboring tiles. -2. Vector features are extracted from Detectron2's predictions, which are originally in a raster format (`numpy` arrays, to be more precise). +2. Vector features are extracted from Detectron2's detections, which are originally in a raster format (`numpy` arrays, to be more precise). -3. Spatial joins are computed between the vectorized predictions and the clipped labels, in order to identify - * True Positives (TP), *i.e.* objects that are found in both datasets, labels and predictions; - * False Positives (FP), *i.e.* objects that are only found in the predictions dataset; +3. Spatial joins are computed between the vectorized detections and the clipped labels, in order to identify + * True Positives (TP), *i.e.* objects that are found in both datasets, labels and detections; + * False Positives (FP), *i.e.* objects that are only found in the detections dataset; * False Negatives (FN), *i.e.* objects that are only found in the labels dataset. -4. Finally, TPs, FPs and FNs are counted in order to compute the following metrics (cf. [this page](https://en.wikipedia.org/wiki/Precision_and_recall)) : +4. Finally, TPs, FPs and FNs are counted in order to compute the following metrics (see [this page](https://en.wikipedia.org/wiki/Precision_and_recall)) : * precision * recall * f1-score -The script can be run by issuing the following command from a terminal: - -```bash -$ python /assess_predictions.py -``` - -Here's the excerpt of the configuration file relevant to this script, with values replaced by textual documentation: +Here's the excerpt of the configuration file relevant to this command, with values replaced by textual documentation: ```yaml -assess_predictions.py: - n_jobs: +assess_detections.py: datasets: ground_truth_labels_geojson: other_labels_geojson: - image_metadata_json: + image_metadata_json: split_aoi_tiles_geojson: - predictions: - trn: - val: - tst: - oth: - output_folder: + detections: + trn: + val: + tst: + oth: + output_folder: ``` ## Examples -A few examples are provided within the folder `examples`. For further details, we refer the user to the various use-case specific readme files: +A few examples are provided within the `examples` folder. For further details, we refer the user to the various use-case specific readme files: * [Swimming Pool Detection over the Canton of Geneva](examples/swimming-pool-detection/GE/README.md) * [Swimming Pool Detection over the Canton of Neuchâtel](examples/swimming-pool-detection/NE/README.md) +* [Quarry Detection over the entire Switzerland](examples/quarry-detection/README.md) + + +## License + +The STDL Object Detector is released under the [MIT license](LICENSE.md). diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c97a576 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +version: "3" + +services: + stdl-objdet: + build: . + volumes: + - ./examples:/app/examples + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + command: /bin/bash \ No newline at end of file diff --git a/examples/quarry-detection/README.md b/examples/quarry-detection/README.md index 144d2e5..b6704a9 100644 --- a/examples/quarry-detection/README.md +++ b/examples/quarry-detection/README.md @@ -3,38 +3,42 @@ A sample working setup is provided here, enabling the end-user to detect quarries and mineral extraction sites in Switzerland over several years.
It consists of the following elements: -- the read-to-use configuration files: - - `config_trne.yaml`, - - `config_prd.yaml`, - - `detectron2_config_dqry.yaml`, -- the input data in the `data` subfolder: - - quarries shapefile from the product [swissTLM3D](https://www.swisstopo.admin.ch/fr/geodata/landscape/tlm3d.html), revised and synchronised with the 2020 [SWISSIMAGE](https://www.swisstopo.admin.ch/fr/geodata/images/ortho/swissimage10.html) mosaic (**label**), - - the delimitation of the AOI to perform inference predictions (**AOI**), - - the swiss DEM raster is too large to be saved on this platform but can be downloaded from this [link](https://github.com/lukasmartinelli/swissdem) using the EPSG:4326 - WGS 84 coordinate reference system. The raster must be first reprojected to EPSG:2056 - CH1903+ / LV95, named `switzerland_dem_EPSG2056.tif`and located in the **DEM** subfolder. -- a data preparation script (`prepare_data.py`) producing the files to be used as input to the `generate_tilesets.py`script. -- a results post-processing script (`filter_prediction.py`) filtering the predictions, produced from `make_prediction.py`script, to the final shapefile - -After creating and a new environment in python 3.8, the end-to-end workflow can be run by issuing the following list of commands, straight from this folder: - -```bash -$ sudo apt-get install -y python3-gdal gdal-bin libgdal-dev gcc g++ python3.8-dev -$ pip install -r ../../requirements.txt -$ python3 prepare_data.py config_trne.yaml -$ python3 ../../scripts/generate_tilesets.py config_trne.yaml -$ python3 ../../scripts/train_model.py config_trne.yaml -$ python3 ../../scripts/make_predictions.py config_trne.yaml -$ python3 ../../scripts/assess_predictions.py config_trne.yaml -$ python3 prepare_data.py config_prd.yaml -$ python3 ../../scripts/generate_tilesets.py config_prd.yaml -$ python3 ../../scripts/make_predictions.py config_prd.yaml -$ python3 filter_detection.py config_prd.yaml +- ready-to-use configuration files: + - `config_trne.yaml`; + - `config_prd.yaml`; + - `detectron2_config_dqry.yaml`. +- Input data in the `data` subfolder: + - quarry **labels** issued from the [swissTLM3D](https://www.swisstopo.admin.ch/fr/geodata/landscape/tlm3d.html) product, revised and synchronized with the 2020 [SWISSIMAGE](https://www.swisstopo.admin.ch/fr/geodata/images/ortho/swissimage10.html) orthophotos; + - the delimitation of the **Area of Interest (AoI)**; + - the Swiss DEM raster is too large to be saved on this platform but can be downloaded from this [link](https://github.com/lukasmartinelli/swissdem) using the [EPSG:4326](https://epsg.io/4326) coordinate reference system. The raster must be re-projected to [EPSG:2056](https://epsg.io/2056), renamed as `switzerland_dem_EPSG2056.tif` and located in the **DEM** subfolder. This procedure is managed by running the bash script `get_dem.sh`. +- A data preparation script (`prepare_data.py`) producing the files to be used as input to the `generate_tilesets` stage. +- A post-processing script (`filter_detections.py`) which filters detections according to their confidence score, altitude and area. The script also identifies and merges groups of nearby polygons. + +The workflow can be run end-to-end by issuing the following list of commands, from the root folder of this GitHub repository: + +``` +$ sudo chown -R 65534:65534 examples +$ docker compose run --rm -it stdl-objdet +nobody@:/app# cd examples/quarry-detection +nobody@:/app# python prepare_data.py config_trne.yaml +nobody@:/app# stdl-objdet generate_tilesets config_trne.yaml +nobody@:/app# stdl-objdet train_model config_trne.yaml +nobody@:/app# stdl-objdet make_detections config_trne.yaml +nobody@:/app# stdl-objdet assess_detections config_trne.yaml +nobody@:/app# python prepare_data.py config_prd.yaml +nobody@:/app# stdl-objdet generate_tilesets config_prd.yaml +nobody@:/app# stdl-objdet make_detections config_prd.yaml +nobody@:/app# bash get_dem.sh +nobody@:/app# python filter_detections.py config_prd.yaml +nobody@:/app# exit +$ sudo chmod -R a+w examples ``` We strongly encourage the end-user to review the provided `config_trne.yaml` and `config_prd.yaml` files as well as the various output files, a list of which is printed by each script before exiting. -The model is trained on the 2020 [SWISSIMAGE](https://www.swisstopo.admin.ch/fr/geodata/images/ortho/swissimage10.html) mosaic. Inference can be performed on SWISSIMAGE mosaics of the product SWISSIMAGE time travel by changing the year in `config_prd.yaml`. It should be noted that the model has been trained on RGB color images and might not perform as well on Black and White images. +The model is trained on the 2020 [SWISSIMAGE](https://www.swisstopo.admin.ch/fr/geodata/images/ortho/swissimage10.html) mosaic. Inference can be performed on SWISSIMAGE mosaics of the product [SWISSIMAGE time travel](https://map.geo.admin.ch/?lang=en&topic=swisstopo&bgLayer=ch.swisstopo.pixelkarte-farbe&zoom=0&layers_timestamp=2004,2004,&layers=ch.swisstopo.swissimage-product,ch.swisstopo.swissimage-product.metadata,ch.swisstopo.images-swissimage-dop10.metadata&E=2594025.91&N=1221065.68&layers_opacity=1,0.7,1&time=2004&layers_visibility=true,true,false) by changing the year in `config_prd.yaml`. It should be noted that the model has been trained on RGB images and might not perform as well on B&W images. -For more information about this project, you can consult [the associated repository](https://github.com/swiss-territorial-data-lab/proj-dqry) (not public yet). +For more information about this project, see [this repository](https://github.com/swiss-territorial-data-lab/proj-dqry) (not public yet). ## Disclaimer diff --git a/examples/quarry-detection/config_prd.yaml b/examples/quarry-detection/config_prd.yaml index eed3083..7232561 100644 --- a/examples/quarry-detection/config_prd.yaml +++ b/examples/quarry-detection/config_prd.yaml @@ -2,25 +2,25 @@ ####### Inference detection ####### # Automatic detection of Quarries and Mineral Extraction Sites (MES) in images -# 1-Produce tiles geometry according to the AOI extent and zoom level +# 1-Produce tiles geometry according to the AoI extent and zoom level prepare_data.py: srs: "EPSG:2056" # Projection of the input file datasets: - labels_shapefile: ./data/AOI/AOI_2020.shp - output_folder: ./output/output-prd + labels_shapefile: ./data/AoI/AoI_2020.shp + output_folder: ./output/output_prd zoom_level: 16 # 2-Fetch of tiles (online server) and split into 3 datasets: train, test, validation generate_tilesets.py: debug_mode: False # sample of tiles datasets: - aoi_tiles_geojson: ./output/output-prd/tiles.geojson + aoi_tiles_geojson: ./output/output_prd/tiles.geojson orthophotos_web_service: type: XYZ # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ url: https://wmts.geo.admin.ch/1.0.0/ch.swisstopo.swissimage-product/default/2020/3857/{z}/{x}/{y}.jpeg - output_folder: ./output/output-prd + output_folder: ./output/output_prd tile_size: 256 # per side, in pixels - overwrite: True + overwrite: False n_jobs: 10 COCO_metadata: year: 2021 @@ -36,29 +36,29 @@ generate_tilesets.py: supercategory: "Land usage" # 3-Perform the object detection based on the optimized trained model -make_predictions.py: - working_folder: ./output/output-prd +make_detections.py: + working_folder: ./output/output_prd log_subfolder: logs sample_tagged_img_subfolder: sample_tagged_images COCO_files: # relative paths, w/ respect to the working_folder oth: COCO_oth.json detectron2_config_file: '../../detectron2_config_dqry.yaml' # path relative to the working_folder model_weights: - pth_file: '../output-trne/logs/model_0002999.pth' # trained model minimizing the validation loss curve, monitor the training process via tensorboard (tensorboard --logdir ) - image_metadata_json: './output/output-prd/img_metadata.json' + pth_file: '../output_trne/logs/model_0002999.pth' # trained model minimizing the validation loss curve, monitor the training process via tensorboard (tensorboard --logdir ) + image_metadata_json: './output/output_prd/img_metadata.json' rdp_simplification: # rdp = Ramer-Douglas-Peucker enabled: True epsilon: 2.0 # cf. https://rdp.readthedocs.io/en/latest/ score_lower_threshold: 0.3 -# 4-Filtering and merging prediction polygons to improve results -filter_prediction.py: +# 4-Filtering and merging detection polygons to improve results +filter_detections.py: year: 2020 - input: ./output/output-prd/oth_predictions_at_0dot3_threshold.gpkg - labels_shapefile: ./data/AOI/AOI_2020.shp + input: ./output/output_prd/oth_detections_at_0dot3_threshold.gpkg + labels_shapefile: ./data/AoI/AoI_2020.shp dem: ./data/DEM/switzerland_dem_EPSG2056.tif elevation: 1200.0 # m, altitude threshold - score: 0.95 # prediction score (from 0 to 1) provided by detectron2 + score: 0.95 # detection score (from 0 to 1) provided by detectron2 distance: 10 # m, distance use as a buffer to merge close polygons (likely to belong to the same object) together area: 5000.0 # m2, area threshold under which polygons are discarded - output: ./output/output-prd/oth_prediction_at_0dot3_threshold_year-{year}_score-{score}_area-{area}_elevation-{elevation}_distance-{distance}.geojson \ No newline at end of file + output: ./output/output_prd/oth_detections_at_0dot3_threshold_year-{year}_score-{score}_area-{area}_elevation-{elevation}_distance-{distance}.geojson \ No newline at end of file diff --git a/examples/quarry-detection/config_trne.yaml b/examples/quarry-detection/config_trne.yaml index e4bac49..f74356a 100644 --- a/examples/quarry-detection/config_trne.yaml +++ b/examples/quarry-detection/config_trne.yaml @@ -2,24 +2,24 @@ ####### Model training and evaluation ####### # Training of automatic detection of Quarries and Mineral Extraction Sites (MES) in images with a provided ground truth -# 1-Produce tiles geometry according to the AOI extent and zoom level +# 1-Produce tiles geometry according to the AoI extent and zoom level prepare_data.py: srs: "EPSG:2056" datasets: labels_shapefile: ./data/labels/tlm-hr-trn-topo.shp - output_folder: ./output/output-trne + output_folder: ./output/output_trne zoom_level: 16 # 2-Fetch of tiles (online server) and split into 3 datasets: train, test, validation generate_tilesets.py: debug_mode: False # sample of tiles datasets: - aoi_tiles_geojson: ./output/output-trne/tiles.geojson - ground_truth_labels_geojson: ./output/output-trne/labels.geojson + aoi_tiles_geojson: ./output/output_trne/tiles.geojson + ground_truth_labels_geojson: ./output/output_trne/labels.geojson orthophotos_web_service: type: XYZ # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ url: https://wmts.geo.admin.ch/1.0.0/ch.swisstopo.swissimage-product/default/2020/3857/{z}/{x}/{y}.jpeg - output_folder: ./output/output-trne + output_folder: ./output/output_trne tile_size: 256 # per side, in pixels overwrite: False n_jobs: 10 @@ -38,7 +38,7 @@ generate_tilesets.py: # 3-Train the model with the detectron2 algorithm train_model.py: - working_folder: ./output/output-trne + working_folder: ./output/output_trne log_subfolder: logs sample_tagged_img_subfolder: sample_tagged_images COCO_files: # relative paths, w/ respect to the working_folder @@ -50,8 +50,8 @@ train_model.py: model_zoo_checkpoint_url: "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" # 4-Perform the object detection based on the optimized trained model -make_predictions.py: - working_folder: ./output/output-trne +make_detections.py: + working_folder: ./output/output_trne log_subfolder: logs sample_tagged_img_subfolder: sample_tagged_images COCO_files: # relative paths, w/ respect to the working_folder @@ -61,20 +61,20 @@ make_predictions.py: detectron2_config_file: '../../detectron2_config_dqry.yaml' # path relative to the working_folder model_weights: pth_file: './logs/model_0002999.pth' # trained model minimizing the validation loss curve, monitor the training process via tensorboard (tensorboard --logdir ) - image_metadata_json: './output/output-trne/img_metadata.json' + image_metadata_json: './output/output_trne/img_metadata.json' rdp_simplification: # rdp = Ramer-Douglas-Peucker enabled: true epsilon: 2.0 # cf. https://rdp.readthedocs.io/en/latest/ score_lower_threshold: 0.05 -# 5-Evaluate the quality of the prediction for the different datasets with metrics calculation -assess_predictions.py: +# 5-Evaluate the quality of the detections for the different datasets with metrics calculation +assess_detections.py: datasets: - ground_truth_labels_geojson: ./output/output-trne/labels.geojson - image_metadata_json: ./output/output-trne/img_metadata.json - split_aoi_tiles_geojson: ./output/output-trne/split_aoi_tiles.geojson # aoi = Area of Interest - predictions: - trn: ./output/output-trne/trn_predictions_at_0dot05_threshold.gpkg - val: ./output/output-trne/val_predictions_at_0dot05_threshold.gpkg - tst: ./output/output-trne/tst_predictions_at_0dot05_threshold.gpkg - output_folder: ./output/output-trne \ No newline at end of file + ground_truth_labels_geojson: ./output/output_trne/labels.geojson + image_metadata_json: ./output/output_trne/img_metadata.json + split_aoi_tiles_geojson: ./output/output_trne/split_aoi_tiles.geojson # aoi = Area of Interest + detections: + trn: ./output/output_trne/trn_detections_at_0dot05_threshold.gpkg + val: ./output/output_trne/val_detections_at_0dot05_threshold.gpkg + tst: ./output/output_trne/tst_detections_at_0dot05_threshold.gpkg + output_folder: ./output/output_trne \ No newline at end of file diff --git a/examples/quarry-detection/data/AoI/AoI_2020.cpg b/examples/quarry-detection/data/AoI/AoI_2020.cpg new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/examples/quarry-detection/data/AoI/AoI_2020.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/examples/quarry-detection/data/AoI/AoI_2020.dbf b/examples/quarry-detection/data/AoI/AoI_2020.dbf new file mode 100644 index 0000000..56a141c Binary files /dev/null and b/examples/quarry-detection/data/AoI/AoI_2020.dbf differ diff --git a/examples/quarry-detection/data/AoI/AoI_2020.prj b/examples/quarry-detection/data/AoI/AoI_2020.prj new file mode 100644 index 0000000..55e14b1 --- /dev/null +++ b/examples/quarry-detection/data/AoI/AoI_2020.prj @@ -0,0 +1 @@ +PROJCS["CH1903+_LV95",GEOGCS["GCS_CH1903+",DATUM["D_CH1903+",SPHEROID["Bessel_1841",6377397.155,299.1528128]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Hotine_Oblique_Mercator_Azimuth_Center"],PARAMETER["False_Easting",2600000.0],PARAMETER["False_Northing",1200000.0],PARAMETER["Scale_Factor",1.0],PARAMETER["Azimuth",90.0],PARAMETER["Longitude_Of_Center",7.43958333333333],PARAMETER["Latitude_Of_Center",46.9524055555556],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/examples/quarry-detection/data/AoI/AoI_2020.shp b/examples/quarry-detection/data/AoI/AoI_2020.shp new file mode 100644 index 0000000..5c98184 Binary files /dev/null and b/examples/quarry-detection/data/AoI/AoI_2020.shp differ diff --git a/examples/quarry-detection/data/AoI/AoI_2020.shx b/examples/quarry-detection/data/AoI/AoI_2020.shx new file mode 100644 index 0000000..fd1a929 Binary files /dev/null and b/examples/quarry-detection/data/AoI/AoI_2020.shx differ diff --git a/examples/quarry-detection/filter_prediction.py b/examples/quarry-detection/filter_detections.py similarity index 75% rename from examples/quarry-detection/filter_prediction.py rename to examples/quarry-detection/filter_detections.py index 3c297e5..f6be5d3 100644 --- a/examples/quarry-detection/filter_prediction.py +++ b/examples/quarry-detection/filter_detections.py @@ -6,23 +6,21 @@ import os import sys -import inspect import time import argparse import yaml -from loguru import logger import geopandas as gpd import pandas as pd import rasterio from sklearn.cluster import KMeans - -# the following allows us to import modules from within this file's parent folder sys.path.insert(0, '.') +from helpers import misc +from helpers.constants import DONE_MSG -logger.remove() -logger.add(sys.stderr, format="{time:YYYY-MM-DD HH:mm:ss} - {level} - {message}", level="INFO") +from loguru import logger +logger = misc.format_logger(logger) if __name__ == "__main__": @@ -31,7 +29,7 @@ tic = time.time() logger.info('Starting...') - # argument parser + # Argument and parameter specification parser = argparse.ArgumentParser(description="The script filters the detection of potential Mineral Extraction Sites obtained with the object-detector scripts") parser.add_argument('config_file', type=str, help='input geojson path') args = parser.parse_args() @@ -61,7 +59,7 @@ input = gpd.read_file(INPUT) input = input.to_crs(2056) total = len(input) - logger.info(f"Total input = {total}") + logger.info(f"{total} input shapes") # Discard polygons detected above the threshold elevalation and 0 m r = rasterio.open(DEM) @@ -75,9 +73,9 @@ input = input[input.elev != 0] te = len(input) - logger.info(f"{str(total - te)} predictions removed by elevation threshold: {str(ELEVATION)} m") + logger.info(f"{total - te} detections were removed by elevation threshold: {ELEVATION} m") - # Centroid of every prediction polygon + # Centroid of every detection polygon centroids = gpd.GeoDataFrame() centroids.geometry = input.representative_point() @@ -87,9 +85,9 @@ cluster = KMeans(n_clusters=k, algorithm='auto', random_state=1) model = cluster.fit(centroids) labels = model.predict(centroids) - logger.info(f"KMeans algorithm computed with k = {str(k)}") + logger.info(f"KMeans algorithm computed with k = {k}") - # Dissolve and Aggregate (keep the max value of aggregate attributes) + # Dissolve and aggregate (keep the max value of aggregate attributes) input['cluster'] = labels input = input.dissolve(by='cluster', aggfunc='max') @@ -98,42 +96,42 @@ # Filter dataframe by score value input = input[input['score'] > SCORE] sc = len(input) - logger.info(f"{str(total - sc)} predictions removed by score threshold: {str(SCORE)}") + logger.info(f"{total - sc} detections were removed by score threshold: {SCORE}") - # Clip prediction to AOI + # Clip detection to AoI input = gpd.clip(input, aoi) - # Create empty data frame - geo_merge = gpd.GeoDataFrame() # Merge close labels using buffer and unions - geo_merge = input.buffer(+DISTANCE, resolution = 2) + geo_merge = gpd.GeoDataFrame() + geo_merge = input.buffer(+DISTANCE, resolution=2) geo_merge = geo_merge.geometry.unary_union - geo_merge = gpd.GeoDataFrame(geometry=[geo_merge], crs = input.crs) + geo_merge = gpd.GeoDataFrame(geometry=[geo_merge], crs=input.crs) geo_merge = geo_merge.explode(index_parts=True).reset_index(drop=True) geo_merge = geo_merge.buffer(-DISTANCE, resolution=2) td = len(geo_merge) - logger.info(f"{str(sc - td)} difference to clustered predictions after union (distance {str(DISTANCE)})") + logger.info(f"{td} clustered detections remains after shape union (distance {DISTANCE})") # Discard polygons with area under the threshold geo_merge = geo_merge[geo_merge.area > AREA] ta = len(geo_merge) - logger.info(f"{str(td - ta)} difference to clustered predictions after union (distance {str(AREA)})") + logger.info(f"{td - ta} detections were removed to after union (distance {AREA})") # Preparation of a geo df data = {'id': geo_merge.index,'area': geo_merge.area, 'centroid_x': geo_merge.centroid.x, 'centroid_y': geo_merge.centroid.y, 'geometry': geo_merge} geo_tmp = gpd.GeoDataFrame(data, crs=input.crs) - # Get the averaged prediction score of the merged polygons + # Get the averaged detection score of the merged polygons intersection = gpd.sjoin(geo_tmp, input, how='inner') intersection['id'] = intersection.index - score_final=intersection.groupby(['id']).mean(numeric_only=True) + score_final = intersection.groupby(['id']).mean(numeric_only=True) + # Formatting the final geo df data = {'id_feature': geo_merge.index,'score': score_final['score'] , 'area': geo_merge.area, 'centroid_x': geo_merge.centroid.x, 'centroid_y': geo_merge.centroid.y, 'geometry': geo_merge} geo_final = gpd.GeoDataFrame(data, crs=input.crs) - logger.info(f"{len(geo_final)} predictions remaining") + logger.info(f"{len(geo_final)} detections remaining after filtering") - # Format the ooutput name of the filtered prediction + # Formatting the output name of the filtered detection feature = OUTPUT.replace('{score}', str(SCORE)).replace('0.', '0dot') \ .replace('{year}', str(int(YEAR)))\ .replace('{area}', str(int(AREA)))\ @@ -142,7 +140,7 @@ geo_final.to_file(feature, driver='GeoJSON') written_files.append(feature) - logger.info(f"...done. A file was written: {feature}") + logger.success(f"{DONE_MSG} A file was written: {feature}") logger.info("The following files were written. Let's check them out!") for written_file in written_files: diff --git a/examples/quarry-detection/get_dem.sh b/examples/quarry-detection/get_dem.sh new file mode 100644 index 0000000..da7563c --- /dev/null +++ b/examples/quarry-detection/get_dem.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +mkdir -p ./data/DEM/ +wget https://github.com/lukasmartinelli/swissdem/releases/download/v1.0/switzerland_dem.tif -O ./data/DEM/switzerland_dem.tif +gdalwarp -t_srs "EPSG:2056" ./data/DEM/switzerland_dem.tif ./data/DEM/switzerland_dem_EPSG2056.tif +rm ./data/DEM/switzerland_dem.tif \ No newline at end of file diff --git a/examples/quarry-detection/prepare_data.py b/examples/quarry-detection/prepare_data.py index 2846511..05a64a3 100644 --- a/examples/quarry-detection/prepare_data.py +++ b/examples/quarry-detection/prepare_data.py @@ -10,18 +10,18 @@ import argparse import yaml import re -from loguru import logger import geopandas as gpd import morecantile import pandas as pd - -# the following allows us to import modules from within this file's parent folder sys.path.insert(0, '.') +from helpers import misc +from helpers.constants import DONE_MSG + +from loguru import logger +logger = misc.format_logger(logger) -logger.remove() -logger.add(sys.stderr, format="{time:YYYY-MM-DD HH:mm:ss} - {level} - {message}", level="INFO") def add_tile_id(row): @@ -56,8 +56,9 @@ def add_tile_id(row): if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) - # Prepare the tiles written_files = [] + + # Prepare the tiles ## Convert datasets shapefiles into geojson format logger.info('Convert labels shapefile into GeoJSON format (EPSG:4326)...') @@ -71,9 +72,9 @@ def add_tile_id(row): label_filepath = os.path.join(OUTPUT_DIR, label_filename) labels_4326.to_file(label_filepath, driver='GeoJSON') written_files.append(label_filepath) - logger.info(f"...done. A file was written: {label_filepath}") + logger.success(f"{DONE_MSG} A file was written: {label_filepath}") - logger.info('Creating tiles for the Area of Interest (AOI)...') + logger.info('Creating tiles for the Area of Interest (AoI)...') # Grid definition tms = morecantile.tms.get("WebMercatorQuad") # epsg:3857 @@ -122,7 +123,7 @@ def add_tile_id(row): tile_filepath = os.path.join(OUTPUT_DIR, tile_filename) tiles_4326.to_file(tile_filepath, driver='GeoJSON') written_files.append(tile_filepath) - logger.info(f"...done. A file was written: {tile_filepath}") + logger.success(f"{DONE_MSG} A file was written: {tile_filepath}") print() logger.info("The following files were written. Let's check them out!") diff --git a/examples/swimming-pool-detection/GE/README.md b/examples/swimming-pool-detection/GE/README.md index b05cae3..38f71fb 100644 --- a/examples/swimming-pool-detection/GE/README.md +++ b/examples/swimming-pool-detection/GE/README.md @@ -1,25 +1,26 @@ - # Example: detecting swimming pools over the Canton of Geneva A sample working setup is here provided, allowing the end-user to detect swimming pools over the Canton of Geneva. It is made up by the following assets: -* ready-to-use configuration files, namely `config_GE.yaml` and `detectron2_config_GE.yaml`; -* supplementary data (`data/OK_z18_tile_IDs.csv`), *i.e.* a curated list of Slippy Map Tiles corresponding to zoom level 18, which seemed to include reliable "ground-truth data" when they were manually checked against [SITG's "Piscines" Open Dataset](https://ge.ch/sitg/fiche/1836), in Summer 2020. The thoughtful user should either review or regenerate this file in order to get better results. -* A data preparation script (`prepare_data.py`), producing files to be used as input to the `generate_training_sets.py` script. +* ready-to-use configuration files, namely `config_GE.yaml` and `detectron2_config_GE.yaml`. +* Supplementary data (`data/OK_z18_tile_IDs.csv`), *i.e.* a curated list of Slippy Map Tiles corresponding to zoom level 18, which seemed to include reliable "ground-truth data" when they were manually checked against the [SITG's "Piscines" Open Dataset](https://ge.ch/sitg/fiche/1836), in Summer 2020. The thoughtful user should either review or regenerate this file in order to get better results. +* A data preparation script (`prepare_data.py`), producing files to be used as input to the `generate_tilesets` stage. -The end-to-end workflow can be run by issuing the following list of commands, straight from this folder: +The workflow can be run end-to-end by issuing the following list of commands, from the root folder of this GitHub repository: -```bash -$ conda activate -$ python prepare_data.py config_GE.yaml -$ cd output_GE -$ cat parcels.geojson | supermercado burn 18 | mercantile shapes | fio collect > parcels_z18_tiles.geojson -$ cd - -$ python prepare_data.py config_GE.yaml -$ python ../../../scripts/generate_tilesets.py config_GE.yaml -$ python ../../../scripts/train_model.py config_GE.yaml -$ python ../../../scripts/make_predictions.py config_GE.yaml -$ python ../../../scripts/assess_predictions.py config_GE.yaml +``` +$ sudo chown -R 65534:65534 examples +$ docker compose run --rm -it stdl-objdet +nobody@:/app# cd examples/swimming-pool-detection/GE +nobody@:/app# python prepare_data.py config_GE.yaml +nobody@:/app# cd output_GE && cat parcels.geojson | supermercado burn 18 | mercantile shapes | fio collect > parcels_z18_tiles.geojson && cd - +nobody@:/app# python prepare_data.py config_GE.yaml +nobody@:/app# stdl-objdet generate_tilesets config_GE.yaml +nobody@:/app# stdl-objdet train_model config_GE.yaml +nobody@:/app# stdl-objdet make_detections config_GE.yaml +nobody@:/app# stdl-objdet assess_detections config_GE.yaml +nobody@:/app# exit +$ sudo chmod -R a+w examples ``` We strongly encourage the end-user to review the provided `config_GE.yaml` file as well as the various output files, a list of which is printed by each script before exiting. diff --git a/examples/swimming-pool-detection/GE/config_GE.yaml b/examples/swimming-pool-detection/GE/config_GE.yaml index 69364f1..d92dd6f 100644 --- a/examples/swimming-pool-detection/GE/config_GE.yaml +++ b/examples/swimming-pool-detection/GE/config_GE.yaml @@ -19,7 +19,7 @@ generate_tilesets.py: output_folder: output_GE tile_size: 256 # per side, in pixels overwrite: False - n_jobs: 1 + n_jobs: 10 COCO_metadata: year: 2020 version: 1.0 @@ -45,10 +45,10 @@ train_model.py: model_weights: model_zoo_checkpoint_url: "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" -make_predictions.py: +make_detections.py: working_folder: output_GE log_subfolder: logs - sample_tagged_img_subfolder: sample_prediction_images + sample_tagged_img_subfolder: sample_detection_images COCO_files: # relative paths, w/ respect to the working_folder trn: COCO_trn.json val: COCO_val.json @@ -63,15 +63,15 @@ make_predictions.py: epsilon: 0.5 # cf. https://rdp.readthedocs.io/en/latest/ score_lower_threshold: 0.05 -assess_predictions.py: +assess_detections.py: datasets: ground_truth_labels_geojson: output_GE/ground_truth_labels.geojson other_labels_geojson: output_GE/other_labels.geojson image_metadata_json: output_GE/img_metadata.json split_aoi_tiles_geojson: output_GE/split_aoi_tiles.geojson # aoi = Area of Interest - predictions: - trn: output_GE/trn_predictions_at_0dot05_threshold.gpkg - val: output_GE/val_predictions_at_0dot05_threshold.gpkg - tst: output_GE/tst_predictions_at_0dot05_threshold.gpkg - oth: output_GE/oth_predictions_at_0dot05_threshold.gpkg + detections: + trn: output_GE/trn_detections_at_0dot05_threshold.gpkg + val: output_GE/val_detections_at_0dot05_threshold.gpkg + tst: output_GE/tst_detections_at_0dot05_threshold.gpkg + oth: output_GE/oth_detections_at_0dot05_threshold.gpkg output_folder: output_GE diff --git a/examples/swimming-pool-detection/GE/prepare_data.py b/examples/swimming-pool-detection/GE/prepare_data.py index 3120978..7962c08 100644 --- a/examples/swimming-pool-detection/GE/prepare_data.py +++ b/examples/swimming-pool-detection/GE/prepare_data.py @@ -1,10 +1,11 @@ #!/bin/python # -*- coding: utf-8 -*- +import os +import sys import time import argparse import yaml -import os, sys import requests import geopandas as gpd import pandas as pd @@ -63,9 +64,9 @@ logger.success(f"...done. {len(dataset_dict[dataset])} records were found.") - # ------ Computing the Area of Interest (AOI) = cadastral parcels - Léman lake + # ------ Computing the Area of Interest (AoI) = cadastral parcels - Léman lake - logger.info("Computing the Area of Interest (AOI)...") + logger.info("Computing the Area of Interest (AoI)...") # N.B.: # it's faster to first compute Slippy Map Tiles (cf. https://developers.planet.com/tutorials/slippy-maps-101/), diff --git a/examples/swimming-pool-detection/NE/README.md b/examples/swimming-pool-detection/NE/README.md index bcde861..32972c6 100644 --- a/examples/swimming-pool-detection/NE/README.md +++ b/examples/swimming-pool-detection/NE/README.md @@ -1,29 +1,30 @@ - # Example: detecting swimming pools over the Canton of Neuchâtel A sample working setup is here provided, allowing the end-user to detect swimming pools over the Canton of Neuchâtel. It is made up by the following assets: -* ready-to-use configuration files, namely `config_NE.yaml` and `detectron2_config_NE.yaml`; -* supplementary data (`data/*`), *i.e.* +* ready-to-use configuration files, namely `config_NE.yaml` and `detectron2_config_NE.yaml`. +* Supplementary data (`data/*`), *i.e.* * geographical sectors covering ground-truth data; * other (non ground-truth) sectors; * ground-truth labels; * other labels. -* A data preparation script (`prepare_data.py`), producing files to be used as input to the `generate_training_sets.py` script. +* A data preparation script (`prepare_data.py`), producing files to be used as input to the `generate_tilesets` stage. -The end-to-end workflow can be run by issuing the following list of commands, straight from this folder: +The workflow can be run end-to-end by issuing the following list of commands, from the root folder of this GitHub repository: -```bash -$ conda activate -$ python prepare_data.py config_NE.yaml -$ cd output_NE -$ cat aoi.geojson | supermercado burn 18 | mercantile shapes | fio collect > aoi_z18_tiles.geojson -$ cd - -$ python prepare_data.py config_NE.yaml -$ python ../../../scripts/generate_tilesets.py config_NE.yaml -$ python ../../../scripts/train_model.py config_NE.yaml -$ python ../../../scripts/make_predictions.py config_NE.yaml -$ python ../../../scripts/assess_predictions.py config_NE.yaml +``` +$ sudo chown -R 65534:65534 examples +$ docker compose run --rm -it stdl-objdet +nobody@:/app# cd examples/swimming-pool-detection/NE +nobody@:/app# python prepare_data.py config_NE.yaml +nobody@:/app# cd output_NE && cat parcels.geojson | supermercado burn 18 | mercantile shapes | fio collect > parcels_z18_tiles.geojson && cd - +nobody@:/app# python prepare_data.py config_NE.yaml +nobody@:/app# stdl-objdet generate_tilesets config_NE.yaml +nobody@:/app# stdl-objdet train_model config_NE.yaml +nobody@:/app# stdl-objdet make_detections config_NE.yaml +nobody@:/app# stdl-objdet assess_detections config_NE.yaml +nobody@:/app# exit +$ sudo chmod -R a+w examples ``` -We strongly encourage the end-user to review the provided `config_GE.yaml` file as well as the various output files, a list of which is printed by each script before exiting. +We strongly encourage the end-user to review the provided `config_NE.yaml` file as well as the various output files, a list of which is printed by each script before exiting. diff --git a/examples/swimming-pool-detection/NE/config_NE.yaml b/examples/swimming-pool-detection/NE/config_NE.yaml index be56354..7b1c283 100644 --- a/examples/swimming-pool-detection/NE/config_NE.yaml +++ b/examples/swimming-pool-detection/NE/config_NE.yaml @@ -47,10 +47,10 @@ train_model.py: model_weights: model_zoo_checkpoint_url: "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" -make_predictions.py: +make_detections.py: working_folder: output_NE log_subfolder: logs - sample_tagged_img_subfolder: sample_prediction_images + sample_tagged_img_subfolder: sample_detection_images COCO_files: # relative paths, w/ respect to the working_folder trn: COCO_trn.json val: COCO_val.json @@ -65,15 +65,15 @@ make_predictions.py: epsilon: 0.5 # cf. https://rdp.readthedocs.io/en/latest/ score_lower_threshold: 0.05 -assess_predictions.py: +assess_detections.py: datasets: ground_truth_labels_geojson: output_NE/ground_truth_labels.geojson other_labels_geojson: output_NE/other_labels.geojson image_metadata_json: output_NE/img_metadata.json split_aoi_tiles_geojson: output_NE/split_aoi_tiles.geojson # aoi = Area of Interest - predictions: - trn: output_NE/trn_predictions_at_0dot05_threshold.gpkg - val: output_NE/val_predictions_at_0dot05_threshold.gpkg - tst: output_NE/tst_predictions_at_0dot05_threshold.gpkg - oth: output_NE/oth_predictions_at_0dot05_threshold.gpkg + detections: + trn: output_NE/trn_detections_at_0dot05_threshold.gpkg + val: output_NE/val_detections_at_0dot05_threshold.gpkg + tst: output_NE/tst_detections_at_0dot05_threshold.gpkg + oth: output_NE/oth_detections_at_0dot05_threshold.gpkg output_folder: output_NE \ No newline at end of file diff --git a/examples/swimming-pool-detection/NE/prepare_data.py b/examples/swimming-pool-detection/NE/prepare_data.py index bd725ef..9af9c71 100644 --- a/examples/swimming-pool-detection/NE/prepare_data.py +++ b/examples/swimming-pool-detection/NE/prepare_data.py @@ -1,10 +1,11 @@ #!/bin/python # -*- coding: utf-8 -*- +import os +import sys import time import argparse import yaml -import os, sys import geopandas as gpd import pandas as pd @@ -59,7 +60,7 @@ logger.success(f"...done. {len(dataset_dict[dataset])} records were found.") - # ------ Computing the Area of Interest (AOI) + # ------ Computing the Area of Interest (AoI) aoi_gdf = pd.concat([ dataset_dict['ground_truth_sectors'], diff --git a/helpers/COCO.py b/helpers/COCO.py index b7b65a3..948b853 100644 --- a/helpers/COCO.py +++ b/helpers/COCO.py @@ -1,7 +1,8 @@ #!/bin/python # -*- coding: utf-8 -*- -import os, sys +import os +import sys import json import numpy as np import logging diff --git a/helpers/MIL.py b/helpers/MIL.py index 3780223..66b0783 100644 --- a/helpers/MIL.py +++ b/helpers/MIL.py @@ -1,7 +1,8 @@ #!/bin/python # -*- coding: utf-8 -*- -import os, sys +import os +import sys import json import requests diff --git a/helpers/WMS.py b/helpers/WMS.py index a28f195..44b9baa 100644 --- a/helpers/WMS.py +++ b/helpers/WMS.py @@ -1,7 +1,8 @@ #!/bin/python # -*- coding: utf-8 -*- -import os, sys +import os +import sys import json import requests diff --git a/helpers/XYZ.py b/helpers/XYZ.py index 292d1a9..66d0c5e 100644 --- a/helpers/XYZ.py +++ b/helpers/XYZ.py @@ -1,7 +1,8 @@ #!/bin/python # -*- coding: utf-8 -*- -import os, sys +import os +import sys import json import requests diff --git a/helpers/detectron2.py b/helpers/detectron2.py index 0a654fd..42e76b0 100644 --- a/helpers/detectron2.py +++ b/helpers/detectron2.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -import os, sys +import os import time import torch import numpy as np @@ -126,9 +126,9 @@ def build_hooks(self): # HELPER FUNCTIONS -def _preprocess(preds): +def _preprocess(dets): - fields = preds['instances'].get_fields() + fields = dets['instances'].get_fields() out = {} @@ -148,11 +148,11 @@ def _preprocess(preds): return out -def detectron2preds_to_features(preds, crs, transform, rdp_enabled, rdp_eps): +def detectron2dets_to_features(dets, crs, transform, rdp_enabled, rdp_eps): feats = [] - tmp = _preprocess(preds) + tmp = _preprocess(dets) for idx in range(len(tmp['scores'])): diff --git a/helpers/misc.py b/helpers/misc.py index 04274de..557fa4a 100644 --- a/helpers/misc.py +++ b/helpers/misc.py @@ -4,7 +4,8 @@ import warnings warnings.simplefilter(action='ignore', category=FutureWarning) -import os, sys +import os +import sys import geopandas as gpd from shapely.affinity import scale @@ -109,25 +110,25 @@ def get_metrics(tp_gdf, fp_gdf, fn_gdf): return precision, recall, f1 -def get_fractional_sets(preds_gdf, labels_gdf): +def get_fractional_sets(dets_gdf, labels_gdf): - _preds_gdf = preds_gdf.copy() + _dets_gdf = dets_gdf.copy() _labels_gdf = labels_gdf.copy() if len(_labels_gdf) == 0: - fp_gdf = _preds_gdf.copy() + fp_gdf = _dets_gdf.copy() tp_gdf = gpd.GeoDataFrame() fn_gdf = gpd.GeoDataFrame() return tp_gdf, fp_gdf, fn_gdf - assert(_preds_gdf.crs == _labels_gdf.crs), f"CRS Mismatch: predictions' CRS = {_preds_gdf.crs}, labels' CRS = {_labels_gdf.crs}" + assert(_dets_gdf.crs == _labels_gdf.crs), f"CRS Mismatch: detections' CRS = {_dets_gdf.crs}, labels' CRS = {_labels_gdf.crs}" - # we add a dummy column to the labels dataset, which should not exist in predictions too; - # this allows us to distinguish matching from non-matching predictions + # we add a dummy column to the labels dataset, which should not exist in detections too; + # this allows us to distinguish matching from non-matching detections _labels_gdf['dummy_id'] = _labels_gdf.index # TRUE POSITIVES - left_join = gpd.sjoin(_preds_gdf, _labels_gdf, how='left', predicate='intersects', lsuffix='left', rsuffix='right') + left_join = gpd.sjoin(_dets_gdf, _labels_gdf, how='left', predicate='intersects', lsuffix='left', rsuffix='right') tp_gdf = left_join[left_join.dummy_id.notnull()].copy() tp_gdf.drop_duplicates(subset=['dummy_id', 'tile_id'], inplace=True) @@ -139,7 +140,7 @@ def get_fractional_sets(preds_gdf, labels_gdf): fp_gdf.drop(columns=['dummy_id'], inplace=True) # FALSE NEGATIVES - right_join = gpd.sjoin(_preds_gdf, _labels_gdf, how='right', predicate='intersects', lsuffix='left', rsuffix='right') + right_join = gpd.sjoin(_dets_gdf, _labels_gdf, how='right', predicate='intersects', lsuffix='left', rsuffix='right') fn_gdf = right_join[right_join.score.isna()].copy() fn_gdf.drop_duplicates(subset=['dummy_id', 'tile_id'], inplace=True) diff --git a/scripts/assess_predictions.py b/scripts/assess_detections.py similarity index 88% rename from scripts/assess_predictions.py rename to scripts/assess_detections.py index 3df3e87..1590f08 100644 --- a/scripts/assess_predictions.py +++ b/scripts/assess_detections.py @@ -1,16 +1,18 @@ #!/bin/python # -*- coding: utf-8 -*- +import os +import sys import time import argparse import yaml -import os, sys import json import geopandas as gpd import pandas as pd import numpy as np import plotly.graph_objects as go + from tqdm import tqdm # the following lines allow us to import modules from within this file's parent folder from inspect import getsourcefile @@ -38,7 +40,7 @@ def main(cfg_file_path): OUTPUT_DIR = cfg['output_folder'] IMG_METADATA_FILE = cfg['datasets']['image_metadata_json'] - PREDICTION_FILES = cfg['datasets']['predictions'] + DETECTION_FILES = cfg['datasets']['detections'] SPLIT_AOI_TILES_GEOJSON = cfg['datasets']['split_aoi_tiles_geojson'] if 'ground_truth_labels_geojson' in cfg['datasets'].keys(): @@ -112,21 +114,21 @@ def main(cfg_file_path): # let's extract filenames (w/o path) img_metadata_dict = {os.path.split(k)[-1]: v for (k, v) in tmp.items()} - # ------ Loading predictions + # ------ Loading detections - preds_gdf_dict = {} + dets_gdf_dict = {} - for dataset, preds_file in PREDICTION_FILES.items(): - preds_gdf_dict[dataset] = gpd.read_file(preds_file) + for dataset, dets_file in DETECTION_FILES.items(): + dets_gdf_dict[dataset] = gpd.read_file(dets_file) if len(labels_gdf)>0: - # ------ Comparing predictions with ground-truth data and computing metrics + # ------ Comparing detections with ground-truth data and computing metrics # init metrics = {} - for dataset in preds_gdf_dict.keys(): + for dataset in dets_gdf_dict.keys(): metrics[dataset] = [] metrics_df_dict = {} @@ -143,7 +145,7 @@ def main(cfg_file_path): inner_tqdm_log.set_description_str(f'Threshold = {threshold:.2f}') - tmp_gdf = preds_gdf_dict[dataset].copy() + tmp_gdf = dets_gdf_dict[dataset].copy() tmp_gdf.to_crs(epsg=clipped_labels_gdf.crs.to_epsg(), inplace=True) tmp_gdf = tmp_gdf[tmp_gdf.score >= threshold].copy() @@ -244,20 +246,20 @@ def main(cfg_file_path): written_files.append(file_to_write) - # ------ tagging predictions + # ------ tagging detections # we select the threshold which maximizes the f1-score on the val dataset selected_threshold = metrics_df_dict['val'].iloc[metrics_df_dict['val']['f1'].argmax()]['threshold'] - logger.info(f"Tagging predictions with threshold = {selected_threshold:.2f}, which maximizes the f1-score on the val dataset.") + logger.info(f"Tagging detections with threshold = {selected_threshold:.2f}, which maximizes the f1-score on the val dataset.") - tagged_preds_gdf_dict = {} + tagged_dets_gdf_dict = {} # TRUE/FALSE POSITIVES, FALSE NEGATIVES for dataset in metrics.keys(): - tmp_gdf = preds_gdf_dict[dataset].copy() + tmp_gdf = dets_gdf_dict[dataset].copy() tmp_gdf.to_crs(epsg=clipped_labels_gdf.crs.to_epsg(), inplace=True) tmp_gdf = tmp_gdf[tmp_gdf.score >= selected_threshold].copy() @@ -269,16 +271,16 @@ def main(cfg_file_path): fn_gdf['tag'] = 'FN' fn_gdf['dataset'] = dataset - tagged_preds_gdf_dict[dataset] = pd.concat([tp_gdf, fp_gdf, fn_gdf]) + tagged_dets_gdf_dict[dataset] = pd.concat([tp_gdf, fp_gdf, fn_gdf]) precision, recall, f1 = misc.get_metrics(tp_gdf, fp_gdf, fn_gdf) logger.info(f'Dataset = {dataset} => precision = {precision:.3f}, recall = {recall:.3f}, f1 = {f1:.3f}') - tagged_preds_gdf = pd.concat([ - tagged_preds_gdf_dict[x] for x in metrics.keys() + tagged_dets_gdf = pd.concat([ + tagged_dets_gdf_dict[x] for x in metrics.keys() ]) - file_to_write = os.path.join(OUTPUT_DIR, 'tagged_predictions.gpkg') - tagged_preds_gdf[['geometry', 'score', 'tag', 'dataset']].to_file(file_to_write, driver='GPKG', index=False) + file_to_write = os.path.join(OUTPUT_DIR, 'tagged_detections.gpkg') + tagged_dets_gdf[['geometry', 'score', 'tag', 'dataset']].to_file(file_to_write, driver='GPKG', index=False) written_files.append(file_to_write) # ------ wrap-up @@ -298,7 +300,7 @@ def main(cfg_file_path): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="This script assesses the quality of predictions with respect to ground-truth/other labels.") + parser = argparse.ArgumentParser(description="This script assesses the quality of detections with respect to ground-truth/other labels.") parser.add_argument('config_file', type=str, help='a YAML config file') args = parser.parse_args() diff --git a/scripts/cli.py b/scripts/cli.py index e132406..49fb63b 100644 --- a/scripts/cli.py +++ b/scripts/cli.py @@ -3,8 +3,8 @@ import argparse from scripts.generate_tilesets import main as generate_tilesets from scripts.train_model import main as train_model -from scripts.make_predictions import main as make_predictions -from scripts.assess_predictions import main as assess_predictions +from scripts.make_detections import main as make_detections +from scripts.assess_detections import main as assess_detections def main(): @@ -12,7 +12,7 @@ def main(): global_parser = argparse.ArgumentParser(prog="stdl-objdet") subparsers = global_parser.add_subparsers( - title="tasks", help="the various tasks which can be performed by the STDL Object Detector" + title="stages", help="the various stages of the STDL Object Detector Framework" ) arg_template = { @@ -27,17 +27,17 @@ def main(): add_parser.add_argument(**arg_template) add_parser.set_defaults(func=generate_tilesets) - add_parser = subparsers.add_parser("train_model", help="This script trains a predictive model.") + add_parser = subparsers.add_parser("train_model", help="This script trains an object detection model.") add_parser.add_argument(**arg_template) add_parser.set_defaults(func=train_model) - add_parser = subparsers.add_parser("make_predictions", help="This script makes predictions, using a previously trained model.") + add_parser = subparsers.add_parser("make_detections", help="This script makes detections, using a previously trained model.") add_parser.add_argument(**arg_template) - add_parser.set_defaults(func=make_predictions) + add_parser.set_defaults(func=make_detections) - add_parser = subparsers.add_parser("assess_predictions", help="This script assesses the quality of predictions with respect to ground-truth/other labels.") + add_parser = subparsers.add_parser("assess_detections", help="This script assesses the quality of detections with respect to ground-truth/other labels.") add_parser.add_argument(**arg_template) - add_parser.set_defaults(func=assess_predictions) + add_parser.set_defaults(func=assess_detections) # https://stackoverflow.com/a/47440202 args = global_parser.parse_args(args=None if sys.argv[1:] else ['--help']) diff --git a/scripts/generate_tilesets.py b/scripts/generate_tilesets.py index cc62e9d..f90eeb0 100644 --- a/scripts/generate_tilesets.py +++ b/scripts/generate_tilesets.py @@ -4,10 +4,11 @@ import warnings warnings.simplefilter(action='ignore', category=FutureWarning) +import os +import sys import time import argparse import yaml -import os, sys import geopandas as gpd import pandas as pd import json @@ -334,7 +335,7 @@ def main(cfg_file_path): sys.exit(1) - # ------ Collecting image metadata, to be used when assessing predictions + # ------ Collecting image metadata, to be used when assessing detections logger.info("Collecting image metadata...") diff --git a/scripts/make_predictions.py b/scripts/make_detections.py similarity index 88% rename from scripts/make_predictions.py rename to scripts/make_detections.py index 91d80ac..ae5780c 100644 --- a/scripts/make_predictions.py +++ b/scripts/make_detections.py @@ -4,11 +4,13 @@ import warnings warnings.simplefilter(action='ignore', category=UserWarning) -import os, sys +import os +import sys +import time import argparse -import json, yaml +import yaml +import json import cv2 -import time import geopandas as gpd from tqdm import tqdm @@ -29,7 +31,7 @@ parent_dir = current_dir[:current_dir.rfind(os.path.sep)] sys.path.insert(0, parent_dir) -from helpers.detectron2 import detectron2preds_to_features +from helpers.detectron2 import detectron2dets_to_features from helpers.misc import image_metadata_to_affine_transform, format_logger from helpers.constants import DONE_MSG @@ -103,15 +105,15 @@ def main(cfg_file_path): predictor = DefaultPredictor(cfg) - # ---- make predictions + # ---- make detections for dataset in COCO_FILES_DICT.keys(): all_feats = [] crs = None - logger.info(f"Making predictions over the entire {dataset} dataset...") + logger.info(f"Making detections over the entire {dataset} dataset...") - prediction_filename = f'{dataset}_predictions_at_{threshold_str}_threshold.gpkg' + detections_filename = f'{dataset}_detections_at_{threshold_str}_threshold.gpkg' for d in tqdm(DatasetCatalog.get(dataset)): @@ -134,21 +136,21 @@ def main(cfg_file_path): crs = _crs transform = image_metadata_to_affine_transform(im_md) - this_image_feats = detectron2preds_to_features(outputs, crs, transform, RDP_SIMPLIFICATION_ENABLED, RDP_SIMPLIFICATION_EPSILON) + this_image_feats = detectron2dets_to_features(outputs, crs, transform, RDP_SIMPLIFICATION_ENABLED, RDP_SIMPLIFICATION_EPSILON) all_feats += this_image_feats gdf = gpd.GeoDataFrame.from_features(all_feats) gdf['dataset'] = dataset gdf.crs = crs - gdf.to_file(prediction_filename, driver='GPKG', index=False) - written_files.append(os.path.join(WORKING_DIR, prediction_filename)) + gdf.to_file(detections_filename, driver='GPKG', index=False) + written_files.append(os.path.join(WORKING_DIR, detections_filename)) logger.success(DONE_MSG) logger.info("Let's tag some sample images...") for d in DatasetCatalog.get(dataset)[0:min(len(DatasetCatalog.get(dataset)), 10)]: - output_filename = f'{dataset}_pred_{d["file_name"].split("/")[-1]}' + output_filename = f'{dataset}_det_{d["file_name"].split("/")[-1]}' output_filename = output_filename.replace('tif', 'png') im = cv2.imread(d["file_name"]) outputs = predictor(im) @@ -180,10 +182,8 @@ def main(cfg_file_path): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="This script makes predictions, using a previously trained model.") + parser = argparse.ArgumentParser(description="This script makes detections, using a previously trained model.") parser.add_argument('config_file', type=str, help='a YAML config file') args = parser.parse_args() - main(args.config_file) - - \ No newline at end of file + main(args.config_file) \ No newline at end of file diff --git a/scripts/train_model.py b/scripts/train_model.py index 960d257..179c41d 100644 --- a/scripts/train_model.py +++ b/scripts/train_model.py @@ -1,11 +1,12 @@ #!/usr/bin/env python # coding: utf-8 +import os +import sys +import time import argparse import yaml -import os, sys import cv2 -import time from detectron2.utils.logger import setup_logger setup_logger() @@ -128,12 +129,12 @@ def main(cfg_file_path): #inference_on_dataset(trainer.model, val_loader, evaluator) cfg.MODEL.WEIGHTS = TRAINED_MODEL_PTH_FILE - logger.info("Make some sample predictions over the test dataset...") + logger.info("Make some sample detections over the test dataset...") predictor = DefaultPredictor(cfg) for d in DatasetCatalog.get("tst_dataset")[0:min(len(DatasetCatalog.get("tst_dataset")), 10)]: - output_filename = "pred_" + d["file_name"].split('/')[-1] + output_filename = "det_" + d["file_name"].split('/')[-1] output_filename = output_filename.replace('tif', 'png') im = cv2.imread(d["file_name"]) outputs = predictor(im) @@ -166,7 +167,7 @@ def main(cfg_file_path): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="This script trains a predictive model.") + parser = argparse.ArgumentParser(description="This script trains an object detection model.") parser.add_argument('config_file', type=str, help='a YAML config file') args = parser.parse_args()