From 03bf0fd7e42fe64b9c2dc234b9ee9433f62401bc Mon Sep 17 00:00:00 2001
From: Jonathan Perry <YrrepNoj@gmail.com>
Date: Tue, 3 Sep 2024 11:20:11 -0400
Subject: [PATCH] chore: add logger configuration to our various main
 entrypoints (#918)

* fix: remove erroneous logging config setting
* chore: set logging basicConfig in main files
* chore: avoid using root logger
* chore: add ability to set lfai log level based on env var

Signed-off-by: Jon Perry <yrrepnoj@gmail.com>

---------

Signed-off-by: Jon Perry <yrrepnoj@gmail.com>
---
 packages/llama-cpp-python/main.py             |  9 ++++++++
 packages/text-embeddings/main.py              | 18 ++++++++++++++--
 packages/vllm/src/main.py                     | 21 ++++++++++++-------
 packages/whisper/main.py                      |  5 ++++-
 src/leapfrogai_api/backend/rag/index.py       | 10 +++++----
 .../backend/rag/leapfrogai_embeddings.py      |  4 +++-
 src/leapfrogai_api/main.py                    | 18 +++++++++-------
 .../create_modify_assistant_request.py        |  8 ++++---
 .../run_create_params_request_base.py         |  8 ++++---
 .../thread_run_create_params_request.py       |  5 ++++-
 .../routers/openai/vector_stores.py           |  4 +++-
 .../routers/supabase_session.py               | 10 +++++----
 src/leapfrogai_api/utils/config.py            | 21 ++++++++++---------
 src/leapfrogai_sdk/config.py                  |  4 +++-
 14 files changed, 98 insertions(+), 47 deletions(-)

diff --git a/packages/llama-cpp-python/main.py b/packages/llama-cpp-python/main.py
index 70b01cc66..dd5d52f02 100644
--- a/packages/llama-cpp-python/main.py
+++ b/packages/llama-cpp-python/main.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from typing import Any, AsyncGenerator
 
@@ -10,6 +11,12 @@
     False if os.environ.get("GPU_ENABLED", "False").lower() != "true" else True
 )
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
 
 @LLM
 class Model:
@@ -27,6 +34,7 @@ class Model:
     async def generate(
         self, prompt: str, config: GenerationConfig
     ) -> AsyncGenerator[str, Any]:
+        logger.info("Begin generating streamed response")
         for res in self.llm(
             prompt,
             stream=True,
@@ -37,6 +45,7 @@ async def generate(
             stop=self.backend_config.stop_tokens,
         ):
             yield res["choices"][0]["text"]  # type: ignore
+        logger.info("Streamed response complete")
 
     async def count_tokens(self, raw_text: str) -> int:
         string_bytes: bytes = bytes(raw_text, "utf-8")
diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py
index 0ad8ce824..1abd2521d 100644
--- a/packages/text-embeddings/main.py
+++ b/packages/text-embeddings/main.py
@@ -11,18 +11,32 @@
     serve,
 )
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
 model_dir = os.environ.get("LFAI_MODEL_PATH", ".model")
 model = INSTRUCTOR(model_dir)
 
 
 class InstructorEmbedding:
     async def CreateEmbedding(self, request: EmbeddingRequest, context: GrpcContext):
-        embeddings = model.encode(request.inputs)
+        logger.info(
+            f"processing CreateEmbedding request: char-length: {len(str(request.inputs))} word-count: {len(str(request.inputs).split())}"
+        )
+
+        embeddings = model.encode(request.inputs, show_progress_bar=True)
 
         embeddings = [Embedding(embedding=inner_list) for inner_list in embeddings]
+
+        logger.info(
+            f"finished processing CreateEmbedding request, created {len(embeddings)} embeddings"
+        )
         return EmbeddingResponse(embeddings=embeddings)
 
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
     asyncio.run(serve(InstructorEmbedding()))
diff --git a/packages/vllm/src/main.py b/packages/vllm/src/main.py
index 307cfbc91..6a530e4f0 100644
--- a/packages/vllm/src/main.py
+++ b/packages/vllm/src/main.py
@@ -30,6 +30,12 @@
 
 load_dotenv()
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
 
 def clamp(n: float | int, smallest: float | int, largest: float | int):
     return max(smallest, min(n, largest))
@@ -142,8 +148,6 @@ class Model:
     random_iterator: RandomAsyncIterator = RandomAsyncIterator([])
 
     def __init__(self):
-        logging.getLogger().setLevel(logging.DEBUG)
-
         # Background thread for managing output iteration
         _thread = threading.Thread(target=asyncio.run, args=(self.iterate_outputs(),))
         _thread.start()
@@ -184,7 +188,7 @@ async def iterate_outputs(self):
                         and request_output.finished
                     ):
                         # Signal that the "generate" function can stop waiting for additional inputs
-                        logging.info(
+                        logger.info(
                             f"Generated {num_tokens_by_id[request_id]} tokens in {time.time() - t0_by_id[request_id]:.2f}s"
                         )
                         self.done_by_id[request_id] = True
@@ -234,12 +238,13 @@ async def create_response(
             max_tokens=config.max_new_tokens,
             skip_special_tokens=False,
         )
-        logging.debug(sampling_params)
-        logging.info(f"Begin generation for request {request_id}")
+        logger.info(f"Begin generation for request {request_id}")
+        logger.debug(f"{request_id} sampling_paramms: {sampling_params}")
+
         # Generate texts from the prompts. The output is a list of RequestOutput objects
         # that contain the prompt, generated text, and other information.
         gen_iter = self.engine.generate(prompt, sampling_params, request_id)
-        logging.info(f"Begin iteration for request {request_id}")
+        logger.info(f"Begin iteration for request {request_id}")
         self.random_iterator.add_iterator(gen_iter)
 
     async def generate_session(
@@ -273,7 +278,7 @@ async def generate(
         )
         _thread.start()
 
-        logging.info(f"Begin reading the output for request {request_id}")
+        logger.info(f"Begin reading the output for request {request_id}")
 
         while not self.done_by_id.get(request_id) or not self.is_queue_empty(
             request_id
@@ -283,7 +288,7 @@ async def generate(
                 result = self.delta_queue_by_id.get(request_id).get()
             yield result
 
-        logging.info(f"Finished request {request_id}")
+        logger.info(f"Finished request {request_id}")
 
     async def count_tokens(self, raw_text: str) -> int:
         tokens: list[int] | list[str] = (await self.engine.get_tokenizer()).tokenize(
diff --git a/packages/whisper/main.py b/packages/whisper/main.py
index 467a7039a..e3d356ac9 100644
--- a/packages/whisper/main.py
+++ b/packages/whisper/main.py
@@ -7,6 +7,10 @@
 import leapfrogai_sdk as lfai
 from faster_whisper import WhisperModel
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
 logger = logging.getLogger(__name__)
 
 model_path = os.environ.get("LFAI_MODEL_PATH", ".model")
@@ -107,7 +111,6 @@ def Name(self, request, context):
 
 
 async def main():
-    logging.basicConfig(level=logging.INFO)
     logger.info(f"GPU_ENABLED = {GPU_ENABLED}")
     await lfai.serve(Whisper())
 
diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py
index ce4ade400..1fc0d7311 100644
--- a/src/leapfrogai_api/backend/rag/index.py
+++ b/src/leapfrogai_api/backend/rag/index.py
@@ -28,6 +28,8 @@
 
 from leapfrogai_api.data.crud_vector_content import CRUDVectorContent, Vector
 
+logger = logging.getLogger(__name__)
+
 # Allows for overwriting type of embeddings that will be instantiated
 embeddings_type: type[Embeddings] | type[LeapfrogAIEmbeddings] | None = (
     LeapfrogAIEmbeddings
@@ -56,13 +58,13 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil
         if await crud_vector_store_file.get(
             filters=FilterVectorStoreFile(vector_store_id=vector_store_id, id=file_id)
         ):
-            logging.error("File already indexed: %s", file_id)
+            logger.error("File already indexed: %s", file_id)
             raise FileAlreadyIndexedError("File already indexed")
 
         if not (
             await crud_vector_store.get(filters=FilterVectorStore(id=vector_store_id))
         ):
-            logging.error("Vector store doesn't exist: %s", vector_store_id)
+            logger.error("Vector store doesn't exist: %s", vector_store_id)
             raise ValueError("Vector store not found")
 
         crud_file_object = CRUDFileObject(db=self.db)
@@ -146,7 +148,7 @@ async def index_files(
                 )
                 responses.append(response)
             except FileAlreadyIndexedError:
-                logging.info("File %s already exists and cannot be re-indexed", file_id)
+                logger.info("File %s already exists and cannot be re-indexed", file_id)
                 continue
             except Exception as exc:
                 raise exc
@@ -267,7 +269,7 @@ async def modify_existing_vector_store(
                 object_=new_vector_store,
             )
         except Exception as exc:
-            logging.error(exc)
+            logger.error(exc)
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
                 detail="Unable to parse vector store request",
diff --git a/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py b/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py
index 0731a8e74..8274a5861 100644
--- a/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py
+++ b/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py
@@ -6,6 +6,8 @@
 from leapfrogai_api.backend.grpc_client import create_embeddings
 import logging
 
+logger = logging.getLogger(__name__)
+
 
 # Partially implements the Langchain Core Embeddings interface
 class LeapfrogAIEmbeddings:
@@ -61,7 +63,7 @@ async def _get_model(
         """
 
         if not (model := get_model_config().get_model_backend(model=model_name)):
-            logging.error(f"Embeddings model {model_name} not found.")
+            logger.error(f"Embeddings model {model_name} not found.")
             raise ValueError("Embeddings model not found.")
 
         return model
diff --git a/src/leapfrogai_api/main.py b/src/leapfrogai_api/main.py
index fa0a1e056..c3c806dbd 100644
--- a/src/leapfrogai_api/main.py
+++ b/src/leapfrogai_api/main.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import logging
+import os
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
@@ -28,17 +29,23 @@
 )
 from leapfrogai_api.utils import get_model_config
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
 
 # handle startup & shutdown tasks
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Handle startup and shutdown tasks for the FastAPI app."""
     # startup
-    logging.info("Starting to watch for configs")
+    logger.info("Starting to watch for configs with this being an info")
     asyncio.create_task(get_model_config().watch_and_load_configs())
     yield
     # shutdown
-    logging.info("Clearing model configs")
+    logger.info("Clearing model configs")
     asyncio.create_task(get_model_config().clear_all_models())
 
 
@@ -47,15 +54,10 @@ async def lifespan(app: FastAPI):
 
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request, exc):
-    logging.error(f"The client sent invalid data!: {exc}")
+    logger.error(f"The client sent invalid data!: {exc}")
     return await request_validation_exception_handler(request, exc)
 
 
-logger = logging.getLogger(__name__)
-handler = logging.StreamHandler()
-handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
-logger.addHandler(handler)
-
 app.include_router(base_router)
 app.include_router(auth.router)
 app.include_router(models.router)
diff --git a/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py b/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py
index 28a10b033..8a0791300 100644
--- a/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py
+++ b/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py
@@ -22,6 +22,8 @@
 )
 from openai.types.beta.assistant_tool import FileSearchTool
 
+logger = logging.getLogger(__name__)
+
 
 class CreateAssistantRequest(BaseModel):
     """Request object for creating an assistant."""
@@ -95,7 +97,7 @@ async def request_checks_and_modifications(self, session: Session):
         """
 
         async def new_vector_store_from_file_ids():
-            logging.debug("Creating vector store for new assistant")
+            logger.debug("Creating vector store for new assistant")
             indexing_service = IndexingService(db=session)
             vector_store_params_dict = vector_stores[0]
 
@@ -127,7 +129,7 @@ async def new_vector_store_from_file_ids():
             self.tool_resources.file_search.vector_store_ids = [vector_store.id]
 
         async def attach_existing_vector_store_from_id():
-            logging.debug(
+            logger.debug(
                 "Attaching vector store with id: {} to new assistant".format(ids[0])
             )
             crud_vector_store = CRUDVectorStore(db=session)
@@ -189,7 +191,7 @@ async def attach_existing_vector_store_from_id():
 
             # nothing provided, no changes made
             else:
-                logging.debug(
+                logger.debug(
                     "No files or vector store id found; assistant will be created with no vector store"
                 )
 
diff --git a/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py b/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py
index 3f9834340..0770178f6 100644
--- a/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py
+++ b/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py
@@ -69,6 +69,8 @@
     ChatCompletionResponse as ProtobufChatCompletionResponse,
 )
 
+logger = logging.getLogger(__name__)
+
 
 class RunCreateParamsRequestBase(BaseModel):
     assistant_id: str = Field(default="", examples=["123ab"])
@@ -101,13 +103,13 @@ def __init__(self, **data):
         super().__init__(**data)
         # TODO: Temporary fix to ensure max_completion_tokens and max_prompt_tokens are set
         if self.max_completion_tokens is None or self.max_completion_tokens < 1:
-            logging.warning(
+            logger.warning(
                 "max_completion_tokens is not set or is less than 1, setting to %s",
                 DEFAULT_MAX_COMPLETION_TOKENS,
             )
             self.max_completion_tokens = DEFAULT_MAX_COMPLETION_TOKENS
         if self.max_prompt_tokens is None or self.max_prompt_tokens < 1:
-            logging.warning(
+            logger.warning(
                 "max_prompt_tokens is not set or is less than 1, setting to %s",
                 DEFAULT_MAX_PROMPT_TOKENS,
             )
@@ -170,7 +172,7 @@ def can_use_rag(
                         return self.tool_choice.type == "file_search"
                 except ValidationError:
                     traceback.print_exc()
-                    logging.error(
+                    logger.error(
                         "Cannot use RAG for request, failed to validate tool for thread"
                     )
                     return False
diff --git a/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py b/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py
index 0eaf6460c..eb38b8406 100644
--- a/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py
+++ b/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py
@@ -39,6 +39,9 @@
 from leapfrogai_api.routers.supabase_session import Session
 
 
+logger = logging.getLogger(__name__)
+
+
 class ThreadRunCreateParamsRequest(RunCreateParamsRequestBase):
     thread: ThreadCreateAndRunsThread | None = Field(
         default=None,
@@ -112,7 +115,7 @@ async def create_thread_request(self) -> CreateThreadRequest:
 
                     thread_request.messages.append(new_message)
                 except ValueError as exc:
-                    logging.error(f"\t{exc}")
+                    logger.error(f"\t{exc}")
                     continue
         return thread_request
 
diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py
index 765d07429..0384e7928 100644
--- a/src/leapfrogai_api/routers/openai/vector_stores.py
+++ b/src/leapfrogai_api/routers/openai/vector_stores.py
@@ -22,6 +22,8 @@
 )
 from leapfrogai_api.routers.supabase_session import Session
 
+logger = logging.getLogger(__name__)
+
 router = APIRouter(prefix="/openai/v1/vector_stores", tags=["openai/vector_stores"])
 
 
@@ -128,7 +130,7 @@ async def create_vector_store_file(
         )
         return vector_store_file
     except Exception as exc:
-        logging.exception("Error indexing file")
+        logger.exception("Error indexing file")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail="Failed to create vector store file",
diff --git a/src/leapfrogai_api/routers/supabase_session.py b/src/leapfrogai_api/routers/supabase_session.py
index f442060f3..400974066 100644
--- a/src/leapfrogai_api/routers/supabase_session.py
+++ b/src/leapfrogai_api/routers/supabase_session.py
@@ -12,6 +12,8 @@
 import gotrue
 from leapfrogai_api.backend.security.api_key import APIKey
 
+logger = logging.getLogger(__name__)
+
 security = HTTPBearer()
 
 
@@ -61,19 +63,19 @@ async def init_supabase_client(
                 access_token=auth_creds.credentials, refresh_token="dummy"
             )
         except gotrue.errors.AuthApiError as e:
-            logging.exception("\t%s", e)
+            logger.exception("\t%s", e)
             raise HTTPException(
                 detail="Token has expired or is not valid. Generate a new token",
                 status_code=status.HTTP_401_UNAUTHORIZED,
             ) from e
         except binascii.Error as e:
-            logging.exception("\t%s", e)
+            logger.exception("\t%s", e)
             raise HTTPException(
                 detail="Failed to validate Authentication Token",
                 status_code=status.HTTP_401_UNAUTHORIZED,
             ) from e
         except Exception as e:
-            logging.exception("\t%s", e)
+            logger.exception("\t%s", e)
             raise HTTPException(
                 detail="Failed to create Supabase session",
                 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
@@ -97,7 +99,7 @@ async def init_supabase_client(
 
         return client
     except ValueError as e:
-        logging.exception("\t%s", e)
+        logger.exception("\t%s", e)
         raise HTTPException(
             detail="Failed to validate API Key",
             status_code=status.HTTP_401_UNAUTHORIZED,
diff --git a/src/leapfrogai_api/utils/config.py b/src/leapfrogai_api/utils/config.py
index 9bb1a00a7..60edfb3e8 100644
--- a/src/leapfrogai_api/utils/config.py
+++ b/src/leapfrogai_api/utils/config.py
@@ -8,7 +8,8 @@
 import yaml
 from watchfiles import Change, awatch
 
-logging.basicConfig(level=logging.INFO)
+
+logger = logging.getLogger(__name__)
 
 
 class Model:
@@ -50,7 +51,7 @@ async def watch_and_load_configs(self, directory=".", filename="config.yaml"):
             async for changes in awatch(directory, recursive=False, step=50):
                 # get two unique lists of files that have been (updated files and deleted files)
                 # (awatch can return duplicates depending on the type of updates that happen)
-                logging.info("Config changes detected: {}".format(changes))
+                logger.info("Config changes detected: {}".format(changes))
                 unique_new_files = set()
                 unique_deleted_files = set()
                 for change in changes:
@@ -77,10 +78,10 @@ async def clear_all_models(self):
         # reset the model config on shutdown (so old model configs don't get cached)
         self.models = {}
         self.config_sources = {}
-        logging.info("All models have been removed")
+        logger.info("All models have been removed")
 
     def load_config_file(self, directory: str, config_file: str):
-        logging.info("Loading config file: {}/{}".format(directory, config_file))
+        logger.info("Loading config file: {}/{}".format(directory, config_file))
 
         # load the config file into the config object
         config_path = os.path.join(directory, config_file)
@@ -93,25 +94,25 @@ def load_config_file(self, directory: str, config_file: str):
                 loaded_artifact = yaml.safe_load(c)
             else:
                 # TODO: Return an error ???
-                logging.error(f"Unsupported file type: {config_path}")
+                logger.error(f"Unsupported file type: {config_path}")
                 return
 
             # parse the object into our config
             self.parse_models(loaded_artifact, config_file)
 
-        logging.info("loaded artifact at {}".format(config_path))
+        logger.info("loaded artifact at {}".format(config_path))
 
         return
 
     def load_all_configs(self, directory="", filename="config.yaml"):
-        logging.info(
+        logger.info(
             "Loading all configs in {} that match the name '{}'".format(
                 directory, filename
             )
         )
 
         if not os.path.exists(directory):
-            logging.error("The config directory ({}) does not exist".format(directory))
+            logger.error("The config directory ({}) does not exist".format(directory))
             return "THE CONFIG DIRECTORY DOES NOT EXIST"
 
         # Get all config files and load them into the config object
@@ -137,12 +138,12 @@ def parse_models(self, loaded_artifact, config_file):
                 self.config_sources[config_file].append(m["name"])
             except KeyError:
                 self.config_sources[config_file] = [m["name"]]
-            logging.info("added {} to model config".format(m["name"]))
+            logger.info("added {} to model config".format(m["name"]))
 
     def remove_model_by_config(self, config_file):
         for model_name in self.config_sources[config_file]:
             self.models.pop(model_name)
-            logging.info("removed {} from model config".format(model_name))
+            logger.info("removed {} from model config".format(model_name))
 
         # clear config once all corresponding models are deleted
         self.config_sources.pop(config_file)
diff --git a/src/leapfrogai_sdk/config.py b/src/leapfrogai_sdk/config.py
index 4932bef17..70b81bfcc 100644
--- a/src/leapfrogai_sdk/config.py
+++ b/src/leapfrogai_sdk/config.py
@@ -6,6 +6,8 @@
 
 from leapfrogai_sdk import ChatItem, ChatRole
 
+logger = logging.getLogger(__name__)
+
 
 class LLMDefaults(BaseConfig):
     temperature: float = 0.5
@@ -58,7 +60,7 @@ def apply_chat_template(
             elif item.role == ChatRole.USER:
                 prompt += self.prompt_format.chat.user.format(item.content)
             elif item.role == ChatRole.FUNCTION:
-                logging.warning(
+                logger.warning(
                     "ChatRole FUNCTION is not implemented for this model and this ChatItem will be ignored."
                 )
         # add the response prefix to start the model's reponse