Skip to content

Commit

Permalink
Synced codebase with mwp_v1 branch
Browse files Browse the repository at this point in the history
  • Loading branch information
aamirw committed Oct 4, 2023
2 parents bb7c991 + c5c7e5f commit a233c53
Show file tree
Hide file tree
Showing 447 changed files with 14,300 additions and 6,212 deletions.
23 changes: 18 additions & 5 deletions .github/workflows/backend_e2e_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,21 @@ jobs:
run-end2end-tests:
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: false
- name: Set Swap Space to 10GB
uses: pierotofy/set-swap-space@master
with:
Expand All @@ -54,12 +69,9 @@ jobs:
chmod -R a+rwx backend_repo/ models_cache/ spacy_models/ tika/
python monkey_patch_docker_compose_for_backend_tests.py
export GID=$(id -g)
export CELERY_TEXT_WORKER_CONCURRENCY=1
export CELERY_IMAGE_WORKER_CONCURRENCY=1
export CELERY_SIMSEARCH_WORKER_CONCURRENCY=1
export CELERY_ARCHIVE_WORKER_CONCURRENCY=1
export API_PRODUCTION_WORKERS=0
docker compose -f compose-test.yml up -d --quiet-pull
export RAY_CONFIG="./config_test_no_gpu.yaml"
docker compose -f compose-test.yml up -d
echo Waiting for containers to start...
sleep 240
cd ..
Expand All @@ -68,6 +80,7 @@ jobs:
TESTDATA_PASSWORD: ${{ secrets.TESTDATA_PASSWORD }}
run: |
cd tools/importer
pip install -r requirements.txt
wget -q http://ltdata1.informatik.uni-hamburg.de/dwts/totalitarismo.zip
unzip -q -P "$TESTDATA_PASSWORD" totalitarismo.zip
python dwts_importer.py --input_dir images --backend_url http://localhost:13120/ --project_name incel --tag_name totalitarisimo
Expand Down
20 changes: 16 additions & 4 deletions .github/workflows/backend_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ jobs:
run-unit-tests:
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: false
- name: Set Swap Space to 10GB
uses: pierotofy/set-swap-space@master
with:
Expand All @@ -61,11 +76,8 @@ jobs:
chmod -R a+rwx backend_repo/ models_cache/ spacy_models/ tika/
python monkey_patch_docker_compose_for_backend_tests.py
export GID=$(id -g)
export CELERY_TEXT_WORKER_CONCURRENCY=1
export CELERY_IMAGE_WORKER_CONCURRENCY=1
export CELERY_SIMSEARCH_WORKER_CONCURRENCY=1
export CELERY_ARCHIVE_WORKER_CONCURRENCY=1
export API_PRODUCTION_WORKERS=0
export RAY_CONFIG="./config_test_no_gpu.yaml"
docker compose -f compose-test.yml up -d --quiet-pull
echo Waiting for containers to start...
sleep 240
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
ci:
skip: [eslint]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# D-WISE Tool Suite

[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/uhh-lt/dwts/mwp_v1.svg)](https://results.pre-commit.ci/latest/github/uhh-lt/dwts/mwp_v1)

This is the repository for the D-WISE Tool Suite (DWTS) - an outcome of
the [D-WISE Project](https://www.dwise.uni-hamburg.de/)

Expand Down
34 changes: 19 additions & 15 deletions backend/.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,9 @@ INSTALL_JUPYTER=true
API_PORT=5500
API_PRODUCTION_MODE=0
API_PRODUCTION_WORKERS=10
CELERY_TEXT_WORKER_CONCURRENCY=10
CELERY_IMAGE_WORKER_CONCURRENCY=1
CELERY_AUDIO_WORKER_CONCURRENCY=1
CELERY_VIDEO_WORKER_CONCURRENCY=1
CELERY_SIMSEARCH_WORKER_CONCURRENCY=1
CELERY_HEAVY_JOBS_WORKER_CONCURRENCY=1
CELERY_BACKGROUND_JOBS_WORKER_CONCURRENCY=1
CELERY_DEBUG_MODE=0

REDIS_HOST=redis
REDIS_PORT=6379
Expand All @@ -36,6 +33,10 @@ ES_HOST=elasticsearch
ES_PORT=9200
ES_MIN_HEALTH=50

RAY_HOST=ray
RAY_PORT=8000
RAY_PROTOCOL=http

API_EXPOSED=13120
POSTGRES_EXPOSED=13121
RABBIT1_EXPOSED=13123
Expand All @@ -44,16 +45,19 @@ RABBIT3_EXPOSED=13125
RABBIT_EXPOSED=13126
REDIS_EXPOSED=13127

JUPYTER_TEXT_EXPOSED=13128
JUPYTER_IMAGE_EXPOSED=13129
JUPYTER_SIMSEARCH_EXPOSED=13136
JUPYTER_HEAVY_JOBS_EXPOSED=13135
JUPYTER_API_EXPOSED=13130
KIBANA_EXPOSED=13128
ELASTICSEARCH_EXPOSED=13129
ELASTICSEARCH1_EXPOSED=13130
CONTENT_SERVER_EXPOSED=13131

RAY_API_EXPOSED=13132
RAY_DASHBOARD_EXPOSED=13133

KIBANA_EXPOSED=13131
ELASTICSEARCH_EXPOSED=13132
ELASTICSEARCH1_EXPOSED=13133
CONTENT_SERVER_EXPOSED=13134
JUPYTER_TEXT_EXPOSED=13134
JUPYTER_IMAGE_EXPOSED=13135
JUPYTER_API_EXPOSED=13136
JUPYTER_SIMSEARCH_EXPOSED=13137
JUPYTER_BACKGROUND_JOBS_EXPOSED=13138

# MAIL SERVICE
MAIL_ENABLED=True
Expand All @@ -65,4 +69,4 @@ MAIL_PORT=587
MAIL_STARTTLS=True
MAIL_SSL_TLS=False
MAIL_USE_CREDENTIALS=True
MAIL_VALIDATE_CERTS=False
MAIL_VALIDATE_CERTS=False
31 changes: 31 additions & 0 deletions backend/.env.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
DWISE_BACKEND_CONFIG=configs/default_localhost_dev.yaml
LOG_LEVEL=debug
JWT_TTL=10080
JWT_SECRET=f5b73acd6d6776350bce963bbcd2853fb5de00a4a4a081280ce1123b4a69aea9

API_PORT=33120
API_PRODUCTION_MODE=0
API_PRODUCTION_WORKERS=10

CELERY_DEBUG_MODE=1

REDIS_HOST=localhost
REDIS_PORT=13124
REDIS_PASSWORD=dwts123

RABBITMQ_HOST=localhost
RABBITMQ_PORT=13123
RABBITMQ_USER=dwtsuser
RABBITMQ_PASSWORD=dwts123

POSTGRES_HOST=localhost
POSTGRES_PORT=13122
POSTGRES_DB=dwts
POSTGRES_USER=dwtsuser
POSTGRES_PASSWORD=dwts123

FLOWER_BASIC_AUTH=dwtsuser:dwts123

ES_HOST=localhost
ES_PORT=13125
ES_MIN_HEALTH=50
1 change: 1 addition & 0 deletions backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
src/app/preprocessing/ray_model_worker/spec.yaml
src/dev_notebooks
backend_repo
sample_data
Expand Down
7 changes: 4 additions & 3 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,19 @@ ENV TIKA_PATH=/tika
RUN wget https://repo1.maven.org/maven2/org/apache/tika/tika-server/${TIKA_VERSION}/tika-server-${TIKA_VERSION}.jar -O /opt/tika.jar -q &&\
wget https://repo1.maven.org/maven2/org/apache/tika/tika-server/${TIKA_VERSION}/tika-server-${TIKA_VERSION}.jar.md5 -O /opt/tika.jar.md5 -q

# install micromamba for anaconda python package management
# install micromamba for anaconda python package management
ARG MICROMAMBA_VERSION=0.27.0
RUN wget -q https://micro.mamba.pm/api/micromamba/linux-64/${MICROMAMBA_VERSION} -O mamba.tar.bz2 && tar -xvj -f mamba.tar.bz2 bin/micromamba && rm mamba.tar.bz2

# create the 'dwts' python environment with all dependencies
ENV MAMBA_ROOT_PREFIX=/opt
COPY environment.yml .
COPY requirements.txt /tmp/requirements.txt
RUN micromamba create -f environment.yml -q -y &&\
micromamba clean -a -f -q -y &&\
find /opt/ -follow -type f -name '*.a' -delete &&\
find /opt/ -follow -type f -name '*.js.map' -delete &&\
rm -r /root/.cache
rm -r /root/.cache


# These settings & scripts are needed to automatically start any CMD in the container with the python environment
Expand All @@ -47,7 +48,7 @@ SHELL ["/usr/local/bin/_dockerfile_shell.sh"]



# set up python env variables
# set up python env variables
ARG DWTS_ENV
ENV DWTS_ENV=${DWTS_ENV} \
PYTHONFAULTHANDLER=1 \
Expand Down
2 changes: 1 addition & 1 deletion backend/_activate_current_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ fi
# For robustness, try all possible activate commands.
conda activate "${ENV_NAME}" 2>/dev/null \
|| mamba activate "${ENV_NAME}" 2>/dev/null \
|| micromamba activate "${ENV_NAME}"
|| micromamba activate "${ENV_NAME}"
2 changes: 1 addition & 1 deletion backend/_dockerfile_shell.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ if [[ "${MAMBA_DOCKERFILE_ACTIVATE}" == "1" ]]; then
source _activate_current_env.sh
fi

exec bash -o pipefail -c "$@"
exec bash -o pipefail -c "$@"
2 changes: 1 addition & 1 deletion backend/_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ fi

source _activate_current_env.sh

exec "$@"
exec "$@"
16 changes: 4 additions & 12 deletions backend/environment.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
name: dwts
channels:
- defaults
- huggingface
- pytorch
- fastai
- conda-forge
- defaults
dependencies:
- python=3.10
- pytorch::pytorch=1.12
- conda-forge::cudatoolkit=11.6
- sentence-transformers=2.2
- huggingface::transformers=4.21
- conda-forge::pip=23.2.1
- pydantic=1.8
- spacy=3.4
- cupy=11.2
- fastapi=0.85
- srsly=2.4.8
- tqdm=4.66.1
- sqlalchemy=1.4
- psycopg2-binary=2.9
- redis-py=4.3
Expand All @@ -32,9 +26,7 @@ dependencies:
- frozendict=2.3
- email_validator=1.3
- sqlalchemy-utils=0.38
- timm=0.6
- python-multipart=0.0.5
- spacy-transformers=1.1
- ftfy=6.1
- beautifulsoup4=4.11.1
- pytest=7.2.0
Expand Down
4 changes: 2 additions & 2 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
faiss-gpu==1.7.2
weaviate-client==3.24.1
fastapi-mail==1.2.5
git+https://github.com/linto-ai/whisper-timestamped.git@d767f4fc3b401c78c20d55515b382838ca3c86aa
multiprocess==0.70.15
Scrapy==2.10.0
scrapy-playwright==0.0.31
cssselect==1.2.0
Expand Down
13 changes: 6 additions & 7 deletions backend/src/api/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from typing import Dict, Generator, Optional

from fastapi import Depends, Query
from fastapi.security import OAuth2PasswordBearer
from jose import JWTError
from pydantic import ValidationError
from sqlalchemy.orm import Session

from api.util import credentials_exception
from app.core.data.crud.user import crud_user
from app.core.data.dto.user import UserRead
from app.core.db.sql_service import SQLService
from app.core.security import decode_jwt
from config import conf
from fastapi import Depends, Query
from fastapi.security import OAuth2PasswordBearer
from jose import JWTError
from pydantic import ValidationError
from sqlalchemy.orm import Session

# instantiate here to so that it is reusable for consecutive calls
reusable_oauth2_scheme = OAuth2PasswordBearer(tokenUrl=conf.api.auth.jwt.token_url)
Expand Down Expand Up @@ -64,7 +63,7 @@ async def get_current_user(
email: str = payload.get("sub")
if email is None:
raise credentials_exception
except (JWTError, ValidationError) as e:
except (JWTError, ValidationError):
raise credentials_exception

user = crud_user.read_by_email(db=db, email=email)
Expand Down
Loading

0 comments on commit a233c53

Please sign in to comment.