-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ch/review assess flai #2
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
assess_flai.py: | ||
working_dir: C:\Users\gwena\Documents\STDL\2_En_cours\occupation-toitures\02_Data | ||
working_dir: . | ||
method: one-to-many # one-to-one or one-to-many | ||
tiles: initial/LiDAR/Emprises tuiles Lidar 2019.shp | ||
gt: processed\manual_GT\merged_occupied_surface.shp | ||
detection: processed\tests\flai\2488500_1116500.shp | ||
tiles: input/lidar_intensity/LiDAR/2019/Tuiles/Emprises tuiles Lidar 2019.shp | ||
gt: input/GT/merged_occupied_surface.shp | ||
detection: input/flai/2488500_1116500.shp | ||
epsg: 2056 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,8 @@ | |
# Copyright (c) 2020 Republic and Canton of Geneva | ||
# | ||
|
||
import os, sys | ||
import os | ||
import sys | ||
from loguru import logger | ||
from tqdm import tqdm | ||
from yaml import load, FullLoader | ||
|
@@ -19,9 +20,9 @@ | |
|
||
sys.path.insert(1, 'scripts') | ||
import functions.fct_metrics as metrics | ||
import functions.fct_misc as fct_misc | ||
import functions.fct_misc as misc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can do it as you prefer, either we always have a function alias named "x" or named "fct_x". I tend to think the 1st option is lighter but I have no strong opinion on that, just that everything must follow the same pattern. |
||
|
||
logger=fct_misc.format_logger(logger) | ||
logger = misc.format_logger(logger) | ||
|
||
# Argument and parameter specification | ||
logger.info(f"Using config_flai.yaml as config file.") | ||
|
@@ -36,14 +37,15 @@ | |
|
||
GT = cfg['gt'] | ||
DETECTION = cfg['detection'] | ||
EPSG = cfg['epsg'] | ||
|
||
TILE_NAME=os.path.basename(DETECTION).split('.')[0] | ||
TILES=cfg['tiles'] | ||
TILE_NAME = os.path.basename(DETECTION).split('.')[0] | ||
TILES = cfg['tiles'] | ||
|
||
os.chdir(WORKING_DIR) | ||
|
||
OUTPUT_DIR='final/flai_metrics' | ||
fct_misc.ensure_dir_exists(OUTPUT_DIR) | ||
OUTPUT_DIR = 'final/flai_metrics' | ||
misc.ensure_dir_exists(OUTPUT_DIR) | ||
|
||
written_files = {} | ||
|
||
|
@@ -53,11 +55,15 @@ | |
gdf_detec = gpd.read_file(DETECTION) | ||
gdf_detec['ID_DET'] = gdf_detec['FID'] | ||
gdf_detec = gdf_detec.rename(columns={"area": "area_DET"}) | ||
gdf_detec = gdf_detec.to_crs(EPSG) | ||
logger.info(f"Read detection file: {gdf_detec.shape[0]} shapes") | ||
|
||
gdf_gt = gpd.read_file(GT) | ||
gdf_gt['ID_GT'] = gdf_gt['fid'] | ||
gdf_gt = gdf_gt.rename(columns={"area": "area_GT"}) | ||
gdf_gt = gdf_gt.to_crs(EPSG) | ||
|
||
misc.test_crs(gdf_detec.crs, gdf_gt.crs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To test the CRS you just set seems a bit over the top. I think we could lighten the code and not put line 66. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can remove line 66. |
||
|
||
if gdf_gt['ID_GT'].isnull().values.any(): | ||
logger.error('Some labels have a null identifier.') | ||
|
@@ -67,110 +73,112 @@ | |
sys.exit(1) | ||
|
||
if TILES: | ||
tiles=gpd.read_file(TILES) | ||
tile=tiles[tiles['fme_basena']==TILE_NAME] | ||
gdf_gt=gdf_gt.overlay(tile) | ||
tiles = gpd.read_file(TILES) | ||
tile = tiles[tiles['fme_basena'] == TILE_NAME] | ||
gdf_gt = gdf_gt.overlay(tile) | ||
|
||
nbr_labels=gdf_gt.shape[0] | ||
nbr_labels = gdf_gt.shape[0] | ||
logger.info(f"Read GT file: {nbr_labels} shapes") | ||
|
||
|
||
logger.info(f"Metrics computation") | ||
if METHOD=='one-to-one': | ||
if METHOD == 'one-to-one': | ||
logger.info('Using the one-to-one method.') | ||
elif METHOD=='one-to-many': | ||
elif METHOD == 'one-to-many': | ||
logger.info('Using one-to-many method.') | ||
else: | ||
logger.warning('Unknown method, defaulting to one-to-one.') | ||
logger.warning('Unknown method, the default value is one-to-one.') | ||
|
||
best_f1=0 | ||
for threshold in tqdm([i/100 for i in range(10 ,100, 5)], desc='Search for the best threshold on the IoU'): | ||
best_f1 = 0 | ||
for threshold in tqdm([i / 100 for i in range(10, 100, 5)], desc='Search for the best threshold on the IoU'): | ||
|
||
tp_gdf_loop, fp_gdf_loop, fn_gdf_loop = metrics.get_fractional_sets(gdf_detec, gdf_gt, iou_threshold=threshold, method=METHOD) | ||
|
||
# Compute metrics | ||
precision, recall, f1 = metrics.get_metrics(tp_gdf_loop, fp_gdf_loop, fn_gdf_loop) | ||
|
||
if f1 > best_f1 or threshold==0: | ||
tp_gdf=tp_gdf_loop | ||
fp_gdf=fp_gdf_loop | ||
fn_gdf=fn_gdf_loop | ||
if f1 > best_f1 or threshold == 0: | ||
tp_gdf = tp_gdf_loop | ||
fp_gdf = fp_gdf_loop | ||
fn_gdf = fn_gdf_loop | ||
|
||
best_precision=precision | ||
best_recall=recall | ||
best_f1=f1 | ||
best_precision = precision | ||
best_recall = recall | ||
best_f1 = f1 | ||
|
||
best_threshold=threshold | ||
best_threshold = threshold | ||
|
||
logger.info(f'The best threshold for the IoU is {best_threshold} in regard to the F1 score.') | ||
logger.info(f'The best threshold for the IoU is {best_threshold} for the F1 score.') | ||
|
||
|
||
TP = tp_gdf.shape[0] | ||
FP = fp_gdf.shape[0] | ||
FN = fn_gdf.shape[0] | ||
|
||
if METHOD=='one-to-many': | ||
tp_with_duplicates=tp_gdf.copy() | ||
dissolved_tp_gdf=tp_with_duplicates.dissolve(by=['ID_DET'], as_index=False) | ||
if METHOD == 'one-to-many': | ||
tp_with_duplicates = tp_gdf.copy() | ||
dissolved_tp_gdf = tp_with_duplicates.dissolve(by=['ID_DET'], as_index=False) | ||
|
||
geom1 = dissolved_tp_gdf.geometry.values.tolist() | ||
geom2 = dissolved_tp_gdf['geom_GT'].values.tolist() | ||
geom_DET = dissolved_tp_gdf.geometry.values.tolist() | ||
geom_GT = dissolved_tp_gdf['geom_GT'].values.tolist() | ||
iou = [] | ||
for (i, ii) in zip(geom1, geom2): | ||
for (i, ii) in zip(geom_DET, geom_GT): | ||
iou.append(metrics.intersection_over_union(i, ii)) | ||
dissolved_tp_gdf['IOU'] = iou | ||
|
||
tp_gdf=dissolved_tp_gdf.copy() | ||
tp_gdf = dissolved_tp_gdf.copy() | ||
|
||
logger.info(f'{tp_with_duplicates.shape[0]-tp_gdf.shape[0]} labels are under a shared predictions with at least one other label.') | ||
logger.info(f'{tp_with_duplicates.shape[0]-tp_gdf.shape[0]} labels share predictions with at least one other label.') | ||
|
||
logger.info(f" TP = {TP}, FP = {FP}, FN = {FN}") | ||
logger.info(f" precision = {best_precision:.2f}, recall = {best_recall:.2f}, f1 = {best_f1:.2f}") | ||
logger.info(f" - Compute mean Jaccard index") | ||
if TP!=0: | ||
|
||
if TP != 0: | ||
iou_average = tp_gdf['IOU'].mean() | ||
logger.info(f" IOU average = {iou_average:.2f}") | ||
|
||
|
||
nbr_tagged_labels = TP + FN | ||
filename=os.path.join(OUTPUT_DIR, 'problematic_objects.gpkg') | ||
filename = os.path.join(OUTPUT_DIR, 'problematic_objects.gpkg') | ||
|
||
if os.path.exists(filename): | ||
os.remove(filename) | ||
if nbr_labels != nbr_tagged_labels: | ||
logger.error(f'There are {nbr_labels} labels in input and {nbr_tagged_labels} labels in output.') | ||
logger.info(f'The list of the problematic labels in exported to {filename}.') | ||
|
||
if nbr_labels > nbr_tagged_labels: | ||
tagged_labels=tp_gdf['ID_GT'].unique().tolist() + fn_gdf['ID_GT'].unique().tolist() | ||
tagged_labels = tp_gdf['ID_GT'].unique().tolist() + fn_gdf['ID_GT'].unique().tolist() | ||
|
||
untagged_gt_gdf=gdf_gt[~gdf_gt['ID_GT'].isin(tagged_labels)] | ||
untagged_gt_gdf = gdf_gt[~gdf_gt['ID_GT'].isin(tagged_labels)] | ||
untagged_gt_gdf.drop(columns=['geom_GT', 'OBSTACLE'], inplace=True) | ||
|
||
layer_name='missing_label_tags' | ||
layer_name = 'missing_label_tags' | ||
untagged_gt_gdf.to_file(filename, layer=layer_name, index=False) | ||
|
||
elif nbr_labels < nbr_tagged_labels: | ||
all_tagged_labels_gdf=pd.concat([tp_gdf, fn_gdf]) | ||
all_tagged_labels_gdf = pd.concat([tp_gdf, fn_gdf]) | ||
|
||
duplicated_id_gt=all_tagged_labels_gdf.loc[all_tagged_labels_gdf.duplicated(subset=['ID_GT']), 'ID_GT'].unique().tolist() | ||
duplicated_labels=all_tagged_labels_gdf[all_tagged_labels_gdf['ID_GT'].isin(duplicated_id_gt)] | ||
duplicated_id_gt = all_tagged_labels_gdf.loc[all_tagged_labels_gdf.duplicated(subset=['ID_GT']), 'ID_GT'].unique().tolist() | ||
duplicated_labels = all_tagged_labels_gdf[all_tagged_labels_gdf['ID_GT'].isin(duplicated_id_gt)] | ||
duplicated_labels.drop(columns=['geom_GT', 'OBSTACLE', 'geom_DET', 'index_right', 'fid', 'FID', 'fme_basena'], inplace=True) | ||
|
||
layer_name='duplicated_label_tags' | ||
layer_name = 'duplicated_label_tags' | ||
duplicated_labels.to_file(filename, layer=layer_name, index=False) | ||
|
||
written_files[filename]=layer_name | ||
written_files[filename] = layer_name | ||
|
||
|
||
# Set the final dataframe with tagged prediction | ||
logger.info(f"Set the final dataframe") | ||
|
||
tagged_preds_gdf = pd.concat([tp_gdf, fp_gdf, fn_gdf]) | ||
tagged_preds_gdf = tagged_preds_gdf.drop(['index_right', 'geom_GT', 'FID', 'fid', 'OBSTACLE', 'geom_DET'], axis = 1) | ||
tagged_preds_gdf = tagged_preds_gdf.drop(['index_right', 'geom_GT', 'FID', 'fid', 'geom_DET'], axis = 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, I removed the 'OBSTACLE' attribute because my layer only have objects and no planes, so it is always equals to 1. If you want to keep it because it could have different values, you would need to check when reading the predictions that you only keep the obstacles and not the free spaces. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, for the image segmentation, I have only objects. so no need to keep it. |
||
|
||
feature_path = os.path.join(OUTPUT_DIR, f'tagged_predictions.gpkg') | ||
tagged_preds_gdf.to_file(feature_path, driver='GPKG', index=False, layer=TILE_NAME + '_tags') | ||
written_files[feature_path]=TILE_NAME + '_tags' | ||
written_files[feature_path] = TILE_NAME + '_tags' | ||
|
||
print() | ||
logger.success("The following files were written. Let's check them out!") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,21 +23,22 @@ def format_logger(logger): | |
level="ERROR") | ||
return logger | ||
|
||
def test_crs(crs1, crs2 = "EPSG:2056"): | ||
def test_crs(crs1, crs2="EPSG:2056"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why change here to remove the spaces and below to add them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
''' | ||
Take the crs of two dataframes and compare them. If they are not the same, stop the script. | ||
''' | ||
if isinstance(crs1, gpd.GeoDataFrame): | ||
crs1=crs1.crs | ||
crs1 = crs1.crs | ||
if isinstance(crs2, gpd.GeoDataFrame): | ||
crs2=crs2.crs | ||
crs2 = crs2.crs | ||
|
||
try: | ||
assert(crs1 == crs2), f"CRS mismatch between the two files ({crs1} vs {crs2})." | ||
except Exception as e: | ||
print(e) | ||
sys.exit(1) | ||
|
||
|
||
def ensure_dir_exists(dirpath): | ||
''' | ||
Test if a directory exists. If not, make it. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you change to match your scripts? Becaus I do it in all mines, so I would have to change them all.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I (will) change it in my scripts. I try to follow the pep8 convention style for python script: