Skip to content

Commit

Permalink
Download data from db and unzip before proceeding
Browse files Browse the repository at this point in the history
  • Loading branch information
chraibi committed Sep 20, 2024
1 parent a3246fb commit 893d3e8
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 3 deletions.
10 changes: 7 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@

# from analysis.proximity_analysis import run_proximity_script,
from analysis.proximity_analysis import run_tab3, run_pair_distribution
from data.datafactory import init_session_state
from utils.helper import get_numbers_country, sorting_key
from datafactory import init_session_state
from utils.helper import get_numbers_country, sorting_key, download_and_extract_zip
from utils.ui import init_app_looks, init_page_config, init_sidebar
from visualization.show_data import run_tab1


if __name__ == "__main__":
setup_logging()
init_page_config()
init_app_looks()
msg = st.empty()
#############
# Call the function to download and extract the data
download_and_extract_zip()
#############
init_app_looks()
init_session_state(msg)
country, tab1, tab2, tab3, tab4 = init_sidebar()
files = st.session_state.config.files[country]
Expand Down
92 changes: 92 additions & 0 deletions utils/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import logging
import os
import zipfile

import shutil
from pathlib import Path
from typing import Any, List, Tuple, TypeAlias, Union
Expand All @@ -18,6 +20,12 @@
from visualization import plots as pl
import streamlit.components.v1 as components

# Define the URL of the zip file and the local path
zip_url = "http://ped.fz-juelich.de/data/experiments/external/singlefile_gender_culture/data/trajectories.zip"
zip_local_path = "data.zip"
extracted_data_folder = Path("data")


Point: TypeAlias = Tuple[float, float]
st_column: TypeAlias = st.delta_generator.DeltaGenerator

Expand Down Expand Up @@ -783,3 +791,87 @@ def sum_distances_between_agents_on_path(
# Choose the shorter distance
distance_sum = min(direct_distance_sum, loop_around_distance_sum)
return distance_sum, p1, p2


def download_zip_file(zip_url: str, zip_local_path: str) -> bool:
"""Download zip file with a progress bar in Streamlit if it doesn't already exist."""
# Check if the file already exists
if os.path.exists(zip_local_path):
logging.info(f"File already exists. Skipping download. > {zip_local_path}")
st.success(f"File already exists at {zip_local_path}")
return True

try:
logging.info("Downloading trajectories from db...")
st.info("Downloading trajectories from db...")

response = requests.get(zip_url, stream=True)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

# Get the total file size from headers, if available
total_size = int(response.headers.get("content-length", 0))

# Initialize Streamlit progress bar
progress_bar = st.progress(0)
downloaded_size = 0

# Open the file and download in chunks, updating the progress bar
with open(zip_local_path, "wb") as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk: # Filter out keep-alive chunks
file.write(chunk)
downloaded_size += len(chunk)

# Update progress bar (percentage of total file size)
if total_size > 0:
progress = downloaded_size / total_size
progress_bar.progress(progress)

logging.info(f"Download complete. > {zip_local_path}")
st.success(f"Download complete. File saved to {zip_local_path}")

except requests.exceptions.RequestException as e:
logging.error(f"Error downloading the file: {e}")
st.error(f"Error downloading the file: {e}")
return False

return True


def extract_zip_file() -> bool:
"""Extract zip file."""
try:
logging.info(f"Extracting data... from {zip_local_path}")
with zipfile.ZipFile(zip_local_path, "r") as zip_ref:
zip_ref.extractall(extracted_data_folder)
logging.info("Extraction complete.")

# Optionally, remove the zip file after extraction
# os.remove(zip_local_path)
except zipfile.BadZipFile as e:
logging.error(f"The downloaded zip file is corrupt: {e}")
return False
except Exception as e:
logging.info(f"An unexpected error occurred while extracting the zip file: {e}")
return False
return True


def download_and_extract_zip() -> None:
"""Download and extract the zip file if it doesn't exist locally."""
if not os.path.exists(extracted_data_folder):
# Create the directory if it doesn't exist
extracted_data_folder.mkdir(parents=True, exist_ok=True)

if not download_zip_file(zip_url, zip_local_path):
logging.error("Failed to download the zip file. Please check the URL or your connection.")
return

if not extract_zip_file():
logging.error("Failed to extract the zip file. Please check the file and try again.")
# Optionally, remove incomplete data
if os.path.exists(extracted_data_folder):
shutil.rmtree(extracted_data_folder)
return
else:
logging.info("Data already exists locally.")

0 comments on commit 893d3e8

Please sign in to comment.