Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ch/review assess flai #2

Merged
merged 4 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions config/config_flai.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
assess_flai.py:
working_dir: C:\Users\gwena\Documents\STDL\2_En_cours\occupation-toitures\02_Data
working_dir: .
method: one-to-many # one-to-one or one-to-many
tiles: initial/LiDAR/Emprises tuiles Lidar 2019.shp
gt: processed\manual_GT\merged_occupied_surface.shp
detection: processed\tests\flai\2488500_1116500.shp
tiles: input/lidar_intensity/LiDAR/2019/Tuiles/Emprises tuiles Lidar 2019.shp
gt: input/GT/merged_occupied_surface.shp
detection: input/flai/2488500_1116500.shp
epsg: 2056
98 changes: 53 additions & 45 deletions scripts/assess_results/assess_flai.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
# Copyright (c) 2020 Republic and Canton of Geneva
#

import os, sys
import os
import sys
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you change to match your scripts? Becaus I do it in all mines, so I would have to change them all.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I (will) change it in my scripts. I try to follow the pep8 convention style for python script:
image

from loguru import logger
from tqdm import tqdm
from yaml import load, FullLoader
Expand All @@ -19,9 +20,9 @@

sys.path.insert(1, 'scripts')
import functions.fct_metrics as metrics
import functions.fct_misc as fct_misc
import functions.fct_misc as misc
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can do it as you prefer, either we always have a function alias named "x" or named "fct_x". I tend to think the 1st option is lighter but I have no strong opinion on that, just that everything must follow the same pattern.


logger=fct_misc.format_logger(logger)
logger = misc.format_logger(logger)

# Argument and parameter specification
logger.info(f"Using config_flai.yaml as config file.")
Expand All @@ -36,14 +37,15 @@

GT = cfg['gt']
DETECTION = cfg['detection']
EPSG = cfg['epsg']

TILE_NAME=os.path.basename(DETECTION).split('.')[0]
TILES=cfg['tiles']
TILE_NAME = os.path.basename(DETECTION).split('.')[0]
TILES = cfg['tiles']

os.chdir(WORKING_DIR)

OUTPUT_DIR='final/flai_metrics'
fct_misc.ensure_dir_exists(OUTPUT_DIR)
OUTPUT_DIR = 'final/flai_metrics'
misc.ensure_dir_exists(OUTPUT_DIR)

written_files = {}

Expand All @@ -53,11 +55,15 @@
gdf_detec = gpd.read_file(DETECTION)
gdf_detec['ID_DET'] = gdf_detec['FID']
gdf_detec = gdf_detec.rename(columns={"area": "area_DET"})
gdf_detec = gdf_detec.to_crs(EPSG)
logger.info(f"Read detection file: {gdf_detec.shape[0]} shapes")

gdf_gt = gpd.read_file(GT)
gdf_gt['ID_GT'] = gdf_gt['fid']
gdf_gt = gdf_gt.rename(columns={"area": "area_GT"})
gdf_gt = gdf_gt.to_crs(EPSG)

misc.test_crs(gdf_detec.crs, gdf_gt.crs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To test the CRS you just set seems a bit over the top. I think we could lighten the code and not put line 66.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can remove line 66.


if gdf_gt['ID_GT'].isnull().values.any():
logger.error('Some labels have a null identifier.')
Expand All @@ -67,110 +73,112 @@
sys.exit(1)

if TILES:
tiles=gpd.read_file(TILES)
tile=tiles[tiles['fme_basena']==TILE_NAME]
gdf_gt=gdf_gt.overlay(tile)
tiles = gpd.read_file(TILES)
tile = tiles[tiles['fme_basena'] == TILE_NAME]
gdf_gt = gdf_gt.overlay(tile)

nbr_labels=gdf_gt.shape[0]
nbr_labels = gdf_gt.shape[0]
logger.info(f"Read GT file: {nbr_labels} shapes")


logger.info(f"Metrics computation")
if METHOD=='one-to-one':
if METHOD == 'one-to-one':
logger.info('Using the one-to-one method.')
elif METHOD=='one-to-many':
elif METHOD == 'one-to-many':
logger.info('Using one-to-many method.')
else:
logger.warning('Unknown method, defaulting to one-to-one.')
logger.warning('Unknown method, the default value is one-to-one.')

best_f1=0
for threshold in tqdm([i/100 for i in range(10 ,100, 5)], desc='Search for the best threshold on the IoU'):
best_f1 = 0
for threshold in tqdm([i / 100 for i in range(10, 100, 5)], desc='Search for the best threshold on the IoU'):

tp_gdf_loop, fp_gdf_loop, fn_gdf_loop = metrics.get_fractional_sets(gdf_detec, gdf_gt, iou_threshold=threshold, method=METHOD)

# Compute metrics
precision, recall, f1 = metrics.get_metrics(tp_gdf_loop, fp_gdf_loop, fn_gdf_loop)

if f1 > best_f1 or threshold==0:
tp_gdf=tp_gdf_loop
fp_gdf=fp_gdf_loop
fn_gdf=fn_gdf_loop
if f1 > best_f1 or threshold == 0:
tp_gdf = tp_gdf_loop
fp_gdf = fp_gdf_loop
fn_gdf = fn_gdf_loop

best_precision=precision
best_recall=recall
best_f1=f1
best_precision = precision
best_recall = recall
best_f1 = f1

best_threshold=threshold
best_threshold = threshold

logger.info(f'The best threshold for the IoU is {best_threshold} in regard to the F1 score.')
logger.info(f'The best threshold for the IoU is {best_threshold} for the F1 score.')


TP = tp_gdf.shape[0]
FP = fp_gdf.shape[0]
FN = fn_gdf.shape[0]

if METHOD=='one-to-many':
tp_with_duplicates=tp_gdf.copy()
dissolved_tp_gdf=tp_with_duplicates.dissolve(by=['ID_DET'], as_index=False)
if METHOD == 'one-to-many':
tp_with_duplicates = tp_gdf.copy()
dissolved_tp_gdf = tp_with_duplicates.dissolve(by=['ID_DET'], as_index=False)

geom1 = dissolved_tp_gdf.geometry.values.tolist()
geom2 = dissolved_tp_gdf['geom_GT'].values.tolist()
geom_DET = dissolved_tp_gdf.geometry.values.tolist()
geom_GT = dissolved_tp_gdf['geom_GT'].values.tolist()
iou = []
for (i, ii) in zip(geom1, geom2):
for (i, ii) in zip(geom_DET, geom_GT):
iou.append(metrics.intersection_over_union(i, ii))
dissolved_tp_gdf['IOU'] = iou

tp_gdf=dissolved_tp_gdf.copy()
tp_gdf = dissolved_tp_gdf.copy()

logger.info(f'{tp_with_duplicates.shape[0]-tp_gdf.shape[0]} labels are under a shared predictions with at least one other label.')
logger.info(f'{tp_with_duplicates.shape[0]-tp_gdf.shape[0]} labels share predictions with at least one other label.')

logger.info(f" TP = {TP}, FP = {FP}, FN = {FN}")
logger.info(f" precision = {best_precision:.2f}, recall = {best_recall:.2f}, f1 = {best_f1:.2f}")
logger.info(f" - Compute mean Jaccard index")
if TP!=0:

if TP != 0:
iou_average = tp_gdf['IOU'].mean()
logger.info(f" IOU average = {iou_average:.2f}")


nbr_tagged_labels = TP + FN
filename=os.path.join(OUTPUT_DIR, 'problematic_objects.gpkg')
filename = os.path.join(OUTPUT_DIR, 'problematic_objects.gpkg')

if os.path.exists(filename):
os.remove(filename)
if nbr_labels != nbr_tagged_labels:
logger.error(f'There are {nbr_labels} labels in input and {nbr_tagged_labels} labels in output.')
logger.info(f'The list of the problematic labels in exported to {filename}.')

if nbr_labels > nbr_tagged_labels:
tagged_labels=tp_gdf['ID_GT'].unique().tolist() + fn_gdf['ID_GT'].unique().tolist()
tagged_labels = tp_gdf['ID_GT'].unique().tolist() + fn_gdf['ID_GT'].unique().tolist()

untagged_gt_gdf=gdf_gt[~gdf_gt['ID_GT'].isin(tagged_labels)]
untagged_gt_gdf = gdf_gt[~gdf_gt['ID_GT'].isin(tagged_labels)]
untagged_gt_gdf.drop(columns=['geom_GT', 'OBSTACLE'], inplace=True)

layer_name='missing_label_tags'
layer_name = 'missing_label_tags'
untagged_gt_gdf.to_file(filename, layer=layer_name, index=False)

elif nbr_labels < nbr_tagged_labels:
all_tagged_labels_gdf=pd.concat([tp_gdf, fn_gdf])
all_tagged_labels_gdf = pd.concat([tp_gdf, fn_gdf])

duplicated_id_gt=all_tagged_labels_gdf.loc[all_tagged_labels_gdf.duplicated(subset=['ID_GT']), 'ID_GT'].unique().tolist()
duplicated_labels=all_tagged_labels_gdf[all_tagged_labels_gdf['ID_GT'].isin(duplicated_id_gt)]
duplicated_id_gt = all_tagged_labels_gdf.loc[all_tagged_labels_gdf.duplicated(subset=['ID_GT']), 'ID_GT'].unique().tolist()
duplicated_labels = all_tagged_labels_gdf[all_tagged_labels_gdf['ID_GT'].isin(duplicated_id_gt)]
duplicated_labels.drop(columns=['geom_GT', 'OBSTACLE', 'geom_DET', 'index_right', 'fid', 'FID', 'fme_basena'], inplace=True)

layer_name='duplicated_label_tags'
layer_name = 'duplicated_label_tags'
duplicated_labels.to_file(filename, layer=layer_name, index=False)

written_files[filename]=layer_name
written_files[filename] = layer_name


# Set the final dataframe with tagged prediction
logger.info(f"Set the final dataframe")

tagged_preds_gdf = pd.concat([tp_gdf, fp_gdf, fn_gdf])
tagged_preds_gdf = tagged_preds_gdf.drop(['index_right', 'geom_GT', 'FID', 'fid', 'OBSTACLE', 'geom_DET'], axis = 1)
tagged_preds_gdf = tagged_preds_gdf.drop(['index_right', 'geom_GT', 'FID', 'fid', 'geom_DET'], axis = 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, I removed the 'OBSTACLE' attribute because my layer only have objects and no planes, so it is always equals to 1. If you want to keep it because it could have different values, you would need to check when reading the predictions that you only keep the obstacles and not the free spaces.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, for the image segmentation, I have only objects. so no need to keep it.


feature_path = os.path.join(OUTPUT_DIR, f'tagged_predictions.gpkg')
tagged_preds_gdf.to_file(feature_path, driver='GPKG', index=False, layer=TILE_NAME + '_tags')
written_files[feature_path]=TILE_NAME + '_tags'
written_files[feature_path] = TILE_NAME + '_tags'

print()
logger.success("The following files were written. Let's check them out!")
Expand Down
55 changes: 29 additions & 26 deletions scripts/functions/fct_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
import pandas as pd


def intersection_over_union(pol1_xy, pol2_xy):
# Define each polygon
polygon1_shape = pol1_xy
polygon2_shape = pol2_xy
def intersection_over_union(polygon1_xy, polygon2_xy):
'''
Compute IoU value for a pair of shape.

# print(polygon1_shape, polygon2_shape)
- polygon1_xy, polygon2_xy: shape geometry
return: IoU of the 2 provided geometries
'''

# Calculate intersection and union, and tne IOU
polygon_intersection = polygon1_shape.intersection(polygon2_shape).area
polygon_union = polygon1_shape.area + polygon2_shape.area - polygon_intersection
polygon_intersection = polygon1_xy.intersection(polygon2_xy).area
polygon_union = polygon1_xy.area + polygon2_xy.area - polygon_intersection

return round(polygon_intersection / polygon_union, 3)


Expand All @@ -19,7 +22,7 @@ def apply_iou_threshold_one_to_one(tp_gdf_ini, threshold=0):
Apply the IoU threshold on the TP detection to only keep the ones with sufficient intersection over union.
Each detection can only correspond to one label.

- tp_gdf_ini: geodataframe of the potiential true positive detection
- tp_gdf_ini: geodataframe of the potential true positive detection
- threshold: threshold to apply on the IoU
return: the tp geodataframe and the geodataframe of the fp due to a low IoU
'''
Expand Down Expand Up @@ -47,20 +50,20 @@ def apply_iou_threshold_one_to_many(tp_gdf_ini, threshold=0):
'''

# Compare the global IOU of the detection based on all the matching labels
sum_detections_gdf=tp_gdf_ini.groupby(['ID_DET'])['IOU'].sum().reset_index()
true_detections_gdf=sum_detections_gdf[sum_detections_gdf['IOU']>threshold]
true_detections_index=true_detections_gdf['ID_DET'].unique().tolist()
sum_detections_gdf = tp_gdf_ini.groupby(['ID_DET'])['IOU'].sum().reset_index()
true_detections_gdf = sum_detections_gdf[sum_detections_gdf['IOU'] > threshold]
true_detections_index = true_detections_gdf['ID_DET'].unique().tolist()

# Check that the label is at least 25% under the prediction.
tp_gdf_ini['label_in_pred']=round(tp_gdf_ini['geom_GT'].intersection(tp_gdf_ini['geom_DET']).area/tp_gdf_ini['geom_GT'].area, 3)
tp_gdf_temp=tp_gdf_ini[(tp_gdf_ini['ID_DET'].isin(true_detections_index)) & (tp_gdf_ini['label_in_pred'] > 0.25)]
tp_gdf_ini['label_in_pred'] = round(tp_gdf_ini['geom_GT'].intersection(tp_gdf_ini['geom_DET']).area / tp_gdf_ini['geom_GT'].area, 3)
tp_gdf_temp = tp_gdf_ini[(tp_gdf_ini['ID_DET'].isin(true_detections_index)) & (tp_gdf_ini['label_in_pred'] > 0.25)]

# For each label, only keep the pred with the best IOU.
sorted_tp_gdf_temp = tp_gdf_temp.sort_values(by='IOU')
tp_gdf=sorted_tp_gdf_temp.drop_duplicates(['ID_GT'], keep='last', ignore_index=True)
id_det_tp=tp_gdf['ID_DET'].unique().tolist()
tp_gdf = sorted_tp_gdf_temp.drop_duplicates(['ID_GT'], keep='last', ignore_index=True)
id_det_tp = tp_gdf['ID_DET'].unique().tolist()

fp_gdf_temp=tp_gdf_ini[~tp_gdf_ini['ID_DET'].isin(id_det_tp)]
fp_gdf_temp = tp_gdf_ini[~tp_gdf_ini['ID_DET'].isin(id_det_tp)]
fp_gdf_temp = fp_gdf_temp.groupby(['ID_DET'], group_keys=False).apply(lambda g:g[g.IOU == g.IOU.max()])

return tp_gdf, fp_gdf_temp
Expand Down Expand Up @@ -98,14 +101,14 @@ def get_fractional_sets(preds_gdf, labels_gdf, iou_threshold=0.1, method='one-to
tp_gdf_temp = left_join[left_join.ID_GT.notnull()].copy()

# IOU computation between GT geometry and Detection geometry
geom1 = tp_gdf_temp['geom_DET'].to_numpy()
geom2 = tp_gdf_temp['geom_GT'].to_numpy()
geom_DET = tp_gdf_temp['geom_DET'].to_numpy()
geom_GT = tp_gdf_temp['geom_GT'].to_numpy()
iou = []
for (i, ii) in zip(geom1, geom2):
for (i, ii) in zip(geom_DET, geom_GT):
iou.append(intersection_over_union(i, ii))
tp_gdf_temp['IOU'] = iou

if method=='one-to-many':
if method == 'one-to-many':
tp_gdf, fp_gdf_temp = apply_iou_threshold_one_to_many(tp_gdf_temp, iou_threshold)
else:
tp_gdf, fp_gdf_temp = apply_iou_threshold_one_to_one(tp_gdf_temp, iou_threshold)
Expand All @@ -118,13 +121,13 @@ def get_fractional_sets(preds_gdf, labels_gdf, iou_threshold=0.1, method='one-to
# FALSE NEGATIVES -> objects that have been missed by the algorithm
right_join = gpd.sjoin(labels_gdf, preds_gdf, how='left', predicate='intersects', lsuffix='left', rsuffix='right')

id_gt_tp=tp_gdf['ID_GT'].unique().tolist()
suppressed_tp=tp_gdf_temp[~tp_gdf_temp['ID_GT'].isin(id_gt_tp)]
id_gt_tp = tp_gdf['ID_GT'].unique().tolist()
suppressed_tp = tp_gdf_temp[~tp_gdf_temp['ID_GT'].isin(id_gt_tp)]
id_gt_filter = suppressed_tp['ID_GT'].unique().tolist()

fn_too_low_hit_gdf = right_join[right_join['ID_GT'].isin(id_gt_filter)]
fn_no_hit_gdf = right_join[right_join.ID_DET.isna()].copy()
fn_gdf = pd.concat([fn_no_hit_gdf, fn_too_low_hit_gdf])
fn_low_overlap_gdf = right_join[right_join['ID_GT'].isin(id_gt_filter)]
fn_no_overlap_gdf = right_join[right_join.ID_DET.isna()].copy()
fn_gdf = pd.concat([fn_no_overlap_gdf, fn_low_overlap_gdf])

fn_gdf.drop_duplicates(subset=['ID_GT'], inplace=True)

Expand All @@ -148,6 +151,6 @@ def get_metrics(tp_gdf, fp_gdf, fn_gdf):

precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2*precision*recall/(precision+recall)
f1 = 2*precision*recall / (precision+recall)

return precision, recall, f1
7 changes: 4 additions & 3 deletions scripts/functions/fct_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,22 @@ def format_logger(logger):
level="ERROR")
return logger

def test_crs(crs1, crs2 = "EPSG:2056"):
def test_crs(crs1, crs2="EPSG:2056"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change here to remove the spaces and below to add them?
You added them in the whole branch, you might as well leave them here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here again I tried to follow the pep8 style convention:
image
image

'''
Take the crs of two dataframes and compare them. If they are not the same, stop the script.
'''
if isinstance(crs1, gpd.GeoDataFrame):
crs1=crs1.crs
crs1 = crs1.crs
if isinstance(crs2, gpd.GeoDataFrame):
crs2=crs2.crs
crs2 = crs2.crs

try:
assert(crs1 == crs2), f"CRS mismatch between the two files ({crs1} vs {crs2})."
except Exception as e:
print(e)
sys.exit(1)


def ensure_dir_exists(dirpath):
'''
Test if a directory exists. If not, make it.
Expand Down