Merge pull request #7 from swiss-territorial-data-lab/ac/rdp

Simplify polygons associated with predictions with the Ramer-Douglas-Peucker algorithm
swiss-territorial-data-lab · Jan 27, 2023 · 91f00d0 · 91f00d0
2 parents af81652 + 90854d6
commit 91f00d0
Show file tree

Hide file tree

Showing 9 changed files with 110 additions and 157 deletions.
diff --git a/examples/swimming-pool-detection/GE/config_GE.yaml b/examples/swimming-pool-detection/GE/config_GE.yaml
@@ -57,6 +57,10 @@ make_predictions.py:
   detectron2_config_file: '../detectron2_config_GE.yaml' # path relative to the working_folder
   model_weights:
       pth_file: 'logs/model_final.pth'
+  image_metadata_json: output_GE/img_metadata.json
+  rdp_simplification: # rdp = Ramer-Douglas-Peucker
+    enabled: true
+    epsilon: 0.5 # cf. https://rdp.readthedocs.io/en/latest/
   score_lower_threshold: 0.05
 
 assess_predictions.py:
@@ -66,8 +70,8 @@ assess_predictions.py:
     image_metadata_json: output_GE/img_metadata.json
     split_aoi_tiles_geojson: output_GE/split_aoi_tiles.geojson # aoi = Area of Interest
     predictions:
-      trn: output_GE/trn_predictions_at_0dot05_threshold.pkl
-      val: output_GE/val_predictions_at_0dot05_threshold.pkl
-      tst: output_GE/tst_predictions_at_0dot05_threshold.pkl
-      oth: output_GE/oth_predictions_at_0dot05_threshold.pkl
+      trn: output_GE/trn_predictions_at_0dot05_threshold.gpkg
+      val: output_GE/val_predictions_at_0dot05_threshold.gpkg
+      tst: output_GE/tst_predictions_at_0dot05_threshold.gpkg
+      oth: output_GE/oth_predictions_at_0dot05_threshold.gpkg
   output_folder: output_GE
diff --git a/examples/swimming-pool-detection/NE/config_NE.yaml b/examples/swimming-pool-detection/NE/config_NE.yaml
@@ -46,7 +46,7 @@ train_model.py:
   detectron2_config_file: '../detectron2_config_NE.yaml' # path relative to the working_folder
   model_weights:
       model_zoo_checkpoint_url: "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
-
+        
 make_predictions.py:
   working_folder: output_NE
   log_subfolder: logs
@@ -59,6 +59,10 @@ make_predictions.py:
   detectron2_config_file: '../detectron2_config_NE.yaml' # path relative to the working_folder
   model_weights:
       pth_file: './logs/model_final.pth'
+  image_metadata_json: output_NE/img_metadata.json
+  rdp_simplification: # rdp = Ramer-Douglas-Peucker
+    enabled: true
+    epsilon: 0.5 # cf. https://rdp.readthedocs.io/en/latest/
   score_lower_threshold: 0.05
 
 assess_predictions.py:
@@ -68,8 +72,8 @@ assess_predictions.py:
     image_metadata_json: output_NE/img_metadata.json
     split_aoi_tiles_geojson: output_NE/split_aoi_tiles.geojson # aoi = Area of Interest
     predictions:
-      trn: output_NE/trn_predictions_at_0dot05_threshold.pkl
-      val: output_NE/val_predictions_at_0dot05_threshold.pkl
-      tst: output_NE/tst_predictions_at_0dot05_threshold.pkl
-      oth: output_NE/oth_predictions_at_0dot05_threshold.pkl
+      trn: output_NE/trn_predictions_at_0dot05_threshold.gpkg
+      val: output_NE/val_predictions_at_0dot05_threshold.gpkg
+      tst: output_NE/tst_predictions_at_0dot05_threshold.gpkg
+      oth: output_NE/oth_predictions_at_0dot05_threshold.gpkg
   output_folder: output_NE
diff --git a/helpers/detectron2.py b/helpers/detectron2.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8
 
-import os
+import os, sys
 import time
 import torch
 import numpy as np
@@ -16,6 +16,11 @@
 from detectron2.utils import comm
 from detectron2.utils.logger import log_every_n_seconds
 
+from rasterio import features
+from shapely.affinity import affine_transform
+from shapely.geometry import box
+from rdp import rdp
+
 # cf. https://medium.com/@apofeniaco/training-on-detectron2-with-a-validation-set-and-plot-loss-on-it-to-avoid-overfitting-6449418fbf4e
 # cf. https://towardsdatascience.com/face-detection-on-custom-dataset-with-detectron2-and-pytorch-using-python-23c17e99e162
 # cf. http://cocodataset.org/#detection-eval
@@ -143,23 +148,50 @@ def _preprocess(preds):
   return out
 
 
-def dt2predictions_to_list(preds):
+def detectron2preds_to_features(preds, crs, transform, rdp_enabled, rdp_eps):
 
-  instances = []
+  feats = []
 
   tmp = _preprocess(preds)
 
   for idx in range(len(tmp['scores'])):
+
     instance = {}
     instance['score'] = tmp['scores'][idx]
     instance['pred_class'] = tmp['pred_classes'][idx]
 
     if 'pred_masks' in tmp.keys():
-      instance['pred_mask'] = tmp['pred_masks'][idx]
-
-    instance['pred_box'] = tmp['pred_boxes'][idx]
-
-    instances.append(instance)
 
-  return instances
+      pred_mask_int = tmp['pred_masks'][idx].astype(np.uint8)
+      _feats = [
+        {
+            'type': 'Feature', 
+            'properties': {'score': instance['score'], 'crs': crs}, 
+            'geometry': geom
+        } for (geom, v) in features.shapes(pred_mask_int, mask=None, transform=transform) if v == 1.0
+      ]
+
+      for f in _feats:
+        if rdp_enabled:
+          coords = f['geometry']['coordinates']
+          coords_after_rdp = [rdp(x, epsilon=rdp_eps) for x in coords]
+          f['geometry']['coordinates'] = coords_after_rdp
+
+        feats.append(f)
+
+    else: # if pred_masks does not exist, pred_boxes should (it depends on Detectron2's MASK_ON config param)
+      instance['pred_box'] = tmp['pred_boxes'][idx]
+
+      geom = affine_transform(box(*instance['pred_box']), [transform.a, transform.b, transform.d, transform.e, transform.xoff, transform.yoff])
+      _feats = [
+          {
+              'type': 'Feature', 
+              'properties': {'score': instance['score'], 'crs': crs}, 
+              'geometry': geom
+          }
+      ]
+
+      feats += _feats
+
+  return feats
 
diff --git a/helpers/misc.py b/helpers/misc.py
@@ -36,75 +36,6 @@ def my_unpack(list_of_tuples):
     return [item for t in list_of_tuples for item in t]
 
 
-# cf. https://gis.stackexchange.com/questions/187877/how-to-polygonize-raster-to-shapely-polygons
-def predictions_to_features(predictions_dict, img_path):
-    """
-        predictions_dict = {"<image_filename>': [<prediction>]
-        <prediction> = {'score': ..., 'pred_class': ..., 'pred_mask': ..., 'pred_box': ...}
-    """
-
-    feats = []
-
-    for k, v in predictions_dict.items():
-        # N.B.: src images are only used for georeferencing (src.crs, src.transform)
-        with rasterio.open(os.path.join(img_path, k)) as src:
-
-            for pred in v:
-
-                pred_mask_int = pred['pred_mask'].astype(int)
-
-                feats += [{'type': 'Feature', 
-                            'properties': {'raster_val': v, 'score': pred['score'], 'crs': src.crs}, 
-                            'geometry': s
-                    } for (s, v) in features.shapes(pred_mask_int, mask=None, transform=src.transform)
-                ]
-
-    return feats
-
-
-def fast_predictions_to_features(predictions_dict, img_metadata_dict):
-    """
-        predictions_dict = {"<image_filename>': [<prediction>]
-        <prediction> = {'score': ..., 'pred_class': ..., 'pred_mask': ..., 'pred_box': ...}
-
-        img_metadata_dict's values includes the metadata issued by ArcGIS Server; keys are equal to filenames
-    """
-
-    feats = []
-
-    for k, v in predictions_dict.items():
-
-        # k is like "images/val-images-256/18_135617_92947.tif"
-        # img_metadata_dict keys are like "18_135617_92947.tif"
-
-        kk = k.split('/')[-1]
-        this_img_metadata = img_metadata_dict[kk]
-        #print(this_img_metadata)
-
-        crs = f"EPSG:{this_img_metadata['extent']['spatialReference']['latestWkid']}"
-        transform = image_metadata_to_affine_transform(this_img_metadata)
-        #print(transform)
-        for pred in v:
-            #print(pred)
-            if 'pred_mask' in pred.keys():
-
-                pred_mask_int = pred['pred_mask'].astype(np.uint8)
-                feats += [{'type': 'Feature', 
-                            'properties': {'raster_val': v, 'score': pred['score'], 'crs': crs}, 
-                            'geometry': s
-                    } for (s, v) in features.shapes(pred_mask_int, mask=None, transform=transform)
-                ]
-
-            else:
-
-                geom = affine_transform(box(*pred['pred_box']), [transform.a, transform.b, transform.d, transform.e, transform.xoff, transform.yoff])
-                feats += [{'type': 'Feature', 
-                            'properties': {'raster_val': 1.0, 'score': pred['score'], 'crs': crs}, 
-                            'geometry': geom}]
-
-    return feats
-
-
 def img_md_record_to_tile_id(img_md_record):
 
         filename = os.path.split(img_md_record.img_file)[-1]
@@ -291,21 +222,4 @@ def image_metadata_to_world_file(image_metadata):
     c += a/2.0 # <- IMPORTANT
     f += e/2.0 # <- IMPORTANT
 
-    return "\n".join([str(a), str(d), str(b), str(e), str(c), str(f)+"\n"])
-
-
-def image_metadata_to_affine_transform(image_metadata):
-    """
-    This uses rasterio.
-    """
-
-    xmin = image_metadata['extent']['xmin']
-    xmax = image_metadata['extent']['xmax']
-    ymin = image_metadata['extent']['ymin']
-    ymax = image_metadata['extent']['ymax']
-    width  = image_metadata['width']
-    height = image_metadata['height']
-
-    affine = from_bounds(xmin, ymin, xmax, ymax, width, height)
-
-    return affine
+    return "\n".join([str(a), str(d), str(b), str(e), str(c), str(f)+"\n"])
diff --git a/requirements.in b/requirements.in
@@ -18,3 +18,4 @@ torch @ https://download.pytorch.org/whl/cu113/torch-1.10.2%2Bcu113-cp38-cp38-li
 torchvision @ https://download.pytorch.org/whl/cu113/torchvision-0.11.3%2Bcu113-cp38-cp38-linux_x86_64.whl
 detectron2 @ https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/detectron2-0.6%2Bcu113-cp38-cp38-linux_x86_64.whl
 plotly
+rdp
diff --git a/requirements.txt b/requirements.txt
@@ -118,6 +118,7 @@ numpy==1.23.3
     #   pandas
     #   pycocotools
     #   rasterio
+    #   rdp
     #   snuggs
     #   supermercado
     #   tensorboard
@@ -186,6 +187,8 @@ rasterio==1.3.2
     # via
     #   -r requirements.in
     #   supermercado
+rdp==0.8
+    # via -r requirements.in
 regex==2022.9.13
     # via black
 requests==2.28.1

diff --git a/scripts/assess_predictions.py b/scripts/assess_predictions.py
@@ -122,47 +122,19 @@
 
     # ------ Loading predictions
 
-    preds_dict = {}
-
-    for dataset, preds_file in PREDICTION_FILES.items():
-        with open(preds_file, 'rb') as fp:
-            preds_dict[dataset] = pickle.load(fp)
-
-    # ------ Extracting vector features out of predictions
-
     preds_gdf_dict = {}
-
-    logger.info(f'Extracting vector features...')
-    tic = time.time()
-    tqdm_log = tqdm(total=len(preds_dict.keys()), position=0)
-
-    for dataset, preds in preds_dict.items():
 
-        tqdm_log.set_description_str(f'Current dataset: {dataset}')
-
-        features = misc.fast_predictions_to_features(preds, img_metadata_dict=img_metadata_dict)
-        gdf = gpd.GeoDataFrame.from_features(features)
-        gdf['dataset'] = dataset
-        gdf.crs = features[0]['properties']['crs']
-
-        preds_gdf_dict[dataset] = gdf[gdf.raster_val == 1.0][['geometry', 'score', 'dataset']]
-
-        file_to_write = os.path.join(OUTPUT_DIR, f"{dataset}_predictions.geojson")
-        preds_gdf_dict[dataset].to_crs(epsg=4326).to_file(file_to_write, driver='GeoJSON', index=False)
-        written_files.append(file_to_write)
-
-        tqdm_log.update(1)
+    for dataset, preds_file in PREDICTION_FILES.items():
+        preds_gdf_dict[dataset] = gpd.read_file(preds_file)
 
-    tqdm_log.close()
-    logger.info(f'...done. Elapsed time = {(time.time()-tic):.2f} seconds.')
 
     if len(labels_gdf)>0:
 
         # ------ Comparing predictions with ground-truth data and computing metrics
 
         # init
         metrics = {}
-        for dataset in preds_dict.keys():
+        for dataset in preds_gdf_dict.keys():
             metrics[dataset] = []
 
         metrics_df_dict = {}
@@ -280,8 +252,6 @@
             written_files.append(file_to_write)
 
 
-
-
         # ------ tagging predictions
 
         # we select the threshold which maximizes the f1-score on the val dataset
@@ -315,8 +285,8 @@
             tagged_preds_gdf_dict[x] for x in metrics.keys()
         ])
 
-        file_to_write = os.path.join(OUTPUT_DIR, f'tagged_predictions.geojson')
-        tagged_preds_gdf[['geometry', 'score', 'tag', 'dataset']].to_crs(epsg=4326).to_file(file_to_write, driver='GeoJSON', index=False)
+        file_to_write = os.path.join(OUTPUT_DIR, f'tagged_predictions.gpkg')
+        tagged_preds_gdf[['geometry', 'score', 'tag', 'dataset']].to_file(file_to_write, driver='GPKG', index=False)
         written_files.append(file_to_write)
 
     # ------ wrap-up