chore: add logger configuration to our various main entrypoints (#918)

* fix: remove erroneous logging config setting * chore: set logging basicConfig in main files * chore: avoid using root logger * chore: add ability to set lfai log level based on env var Signed-off-by: Jon Perry <yrrepnoj@gmail.com> --------- Signed-off-by: Jon Perry <yrrepnoj@gmail.com>
defenseunicorns · Sep 3, 2024 · 03bf0fd · 03bf0fd
1 parent cef1535
commit 03bf0fd
Show file tree

Hide file tree

Showing 14 changed files with 98 additions and 47 deletions.
diff --git a/packages/llama-cpp-python/main.py b/packages/llama-cpp-python/main.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from typing import Any, AsyncGenerator
 
@@ -10,6 +11,12 @@
     False if os.environ.get("GPU_ENABLED", "False").lower() != "true" else True
 )
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
 
 @LLM
 class Model:
@@ -27,6 +34,7 @@ class Model:
     async def generate(
         self, prompt: str, config: GenerationConfig
     ) -> AsyncGenerator[str, Any]:
+        logger.info("Begin generating streamed response")
         for res in self.llm(
             prompt,
             stream=True,
@@ -37,6 +45,7 @@ async def generate(
             stop=self.backend_config.stop_tokens,
         ):
             yield res["choices"][0]["text"]  # type: ignore
+        logger.info("Streamed response complete")
 
     async def count_tokens(self, raw_text: str) -> int:
         string_bytes: bytes = bytes(raw_text, "utf-8")

diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py
@@ -11,18 +11,32 @@
     serve,
 )
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
 model_dir = os.environ.get("LFAI_MODEL_PATH", ".model")
 model = INSTRUCTOR(model_dir)
 
 
 class InstructorEmbedding:
     async def CreateEmbedding(self, request: EmbeddingRequest, context: GrpcContext):
-        embeddings = model.encode(request.inputs)
+        logger.info(
+            f"processing CreateEmbedding request: char-length: {len(str(request.inputs))} word-count: {len(str(request.inputs).split())}"
+        )
+
+        embeddings = model.encode(request.inputs, show_progress_bar=True)
 
         embeddings = [Embedding(embedding=inner_list) for inner_list in embeddings]
+
+        logger.info(
+            f"finished processing CreateEmbedding request, created {len(embeddings)} embeddings"
+        )
         return EmbeddingResponse(embeddings=embeddings)
 
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
     asyncio.run(serve(InstructorEmbedding()))
diff --git a/packages/vllm/src/main.py b/packages/vllm/src/main.py
@@ -30,6 +30,12 @@
 
 load_dotenv()
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
 
 def clamp(n: float | int, smallest: float | int, largest: float | int):
     return max(smallest, min(n, largest))
@@ -142,8 +148,6 @@ class Model:
     random_iterator: RandomAsyncIterator = RandomAsyncIterator([])
 
     def __init__(self):
-        logging.getLogger().setLevel(logging.DEBUG)
-
         # Background thread for managing output iteration
         _thread = threading.Thread(target=asyncio.run, args=(self.iterate_outputs(),))
         _thread.start()
@@ -184,7 +188,7 @@ async def iterate_outputs(self):
                         and request_output.finished
                     ):
                         # Signal that the "generate" function can stop waiting for additional inputs
-                        logging.info(
+                        logger.info(
                             f"Generated {num_tokens_by_id[request_id]} tokens in {time.time() - t0_by_id[request_id]:.2f}s"
                         )
                         self.done_by_id[request_id] = True
@@ -234,12 +238,13 @@ async def create_response(
             max_tokens=config.max_new_tokens,
             skip_special_tokens=False,
         )
-        logging.debug(sampling_params)
-        logging.info(f"Begin generation for request {request_id}")
+        logger.info(f"Begin generation for request {request_id}")
+        logger.debug(f"{request_id} sampling_paramms: {sampling_params}")
+
         # Generate texts from the prompts. The output is a list of RequestOutput objects
         # that contain the prompt, generated text, and other information.
         gen_iter = self.engine.generate(prompt, sampling_params, request_id)
-        logging.info(f"Begin iteration for request {request_id}")
+        logger.info(f"Begin iteration for request {request_id}")
         self.random_iterator.add_iterator(gen_iter)
 
     async def generate_session(
@@ -273,7 +278,7 @@ async def generate(
         )
         _thread.start()
 
-        logging.info(f"Begin reading the output for request {request_id}")
+        logger.info(f"Begin reading the output for request {request_id}")
 
         while not self.done_by_id.get(request_id) or not self.is_queue_empty(
             request_id
@@ -283,7 +288,7 @@ async def generate(
                 result = self.delta_queue_by_id.get(request_id).get()
             yield result
 
-        logging.info(f"Finished request {request_id}")
+        logger.info(f"Finished request {request_id}")
 
     async def count_tokens(self, raw_text: str) -> int:
         tokens: list[int] | list[str] = (await self.engine.get_tokenizer()).tokenize(

diff --git a/packages/whisper/main.py b/packages/whisper/main.py
@@ -7,6 +7,10 @@
 import leapfrogai_sdk as lfai
 from faster_whisper import WhisperModel
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
 logger = logging.getLogger(__name__)
 
 model_path = os.environ.get("LFAI_MODEL_PATH", ".model")
@@ -107,7 +111,6 @@ def Name(self, request, context):
 
 
 async def main():
-    logging.basicConfig(level=logging.INFO)
     logger.info(f"GPU_ENABLED = {GPU_ENABLED}")
     await lfai.serve(Whisper())
 

diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py
@@ -28,6 +28,8 @@
 
 from leapfrogai_api.data.crud_vector_content import CRUDVectorContent, Vector
 
+logger = logging.getLogger(__name__)
+
 # Allows for overwriting type of embeddings that will be instantiated
 embeddings_type: type[Embeddings] | type[LeapfrogAIEmbeddings] | None = (
     LeapfrogAIEmbeddings
@@ -56,13 +58,13 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil
         if await crud_vector_store_file.get(
             filters=FilterVectorStoreFile(vector_store_id=vector_store_id, id=file_id)
         ):
-            logging.error("File already indexed: %s", file_id)
+            logger.error("File already indexed: %s", file_id)
             raise FileAlreadyIndexedError("File already indexed")
 
         if not (
             await crud_vector_store.get(filters=FilterVectorStore(id=vector_store_id))
         ):
-            logging.error("Vector store doesn't exist: %s", vector_store_id)
+            logger.error("Vector store doesn't exist: %s", vector_store_id)
             raise ValueError("Vector store not found")
 
         crud_file_object = CRUDFileObject(db=self.db)
@@ -146,7 +148,7 @@ async def index_files(
                 )
                 responses.append(response)
             except FileAlreadyIndexedError:
-                logging.info("File %s already exists and cannot be re-indexed", file_id)
+                logger.info("File %s already exists and cannot be re-indexed", file_id)
                 continue
             except Exception as exc:
                 raise exc
@@ -267,7 +269,7 @@ async def modify_existing_vector_store(
                 object_=new_vector_store,
             )
         except Exception as exc:
-            logging.error(exc)
+            logger.error(exc)
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
                 detail="Unable to parse vector store request",

diff --git a/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py b/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py
@@ -6,6 +6,8 @@
 from leapfrogai_api.backend.grpc_client import create_embeddings
 import logging
 
+logger = logging.getLogger(__name__)
+
 
 # Partially implements the Langchain Core Embeddings interface
 class LeapfrogAIEmbeddings:
@@ -61,7 +63,7 @@ async def _get_model(
         """
 
         if not (model := get_model_config().get_model_backend(model=model_name)):
-            logging.error(f"Embeddings model {model_name} not found.")
+            logger.error(f"Embeddings model {model_name} not found.")
             raise ValueError("Embeddings model not found.")
 
         return model
diff --git a/src/leapfrogai_api/main.py b/src/leapfrogai_api/main.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import logging
+import os
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
@@ -28,17 +29,23 @@
 )
 from leapfrogai_api.utils import get_model_config
 
+logging.basicConfig(
+    level=os.getenv("LFAI_LOG_LEVEL", logging.INFO),
+    format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s",
+)
+logger = logging.getLogger(__name__)
+
 
 # handle startup & shutdown tasks
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Handle startup and shutdown tasks for the FastAPI app."""
     # startup
-    logging.info("Starting to watch for configs")
+    logger.info("Starting to watch for configs with this being an info")
     asyncio.create_task(get_model_config().watch_and_load_configs())
     yield
     # shutdown
-    logging.info("Clearing model configs")
+    logger.info("Clearing model configs")
     asyncio.create_task(get_model_config().clear_all_models())
 
 
@@ -47,15 +54,10 @@ async def lifespan(app: FastAPI):
 
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request, exc):
-    logging.error(f"The client sent invalid data!: {exc}")
+    logger.error(f"The client sent invalid data!: {exc}")
     return await request_validation_exception_handler(request, exc)
 
 
-logger = logging.getLogger(__name__)
-handler = logging.StreamHandler()
-handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
-logger.addHandler(handler)
-
 app.include_router(base_router)
 app.include_router(auth.router)
 app.include_router(models.router)

diff --git a/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py b/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py
@@ -22,6 +22,8 @@
 )
 from openai.types.beta.assistant_tool import FileSearchTool
 
+logger = logging.getLogger(__name__)
+
 
 class CreateAssistantRequest(BaseModel):
     """Request object for creating an assistant."""
@@ -95,7 +97,7 @@ async def request_checks_and_modifications(self, session: Session):
         """
 
         async def new_vector_store_from_file_ids():
-            logging.debug("Creating vector store for new assistant")
+            logger.debug("Creating vector store for new assistant")
             indexing_service = IndexingService(db=session)
             vector_store_params_dict = vector_stores[0]
 
@@ -127,7 +129,7 @@ async def new_vector_store_from_file_ids():
             self.tool_resources.file_search.vector_store_ids = [vector_store.id]
 
         async def attach_existing_vector_store_from_id():
-            logging.debug(
+            logger.debug(
                 "Attaching vector store with id: {} to new assistant".format(ids[0])
             )
             crud_vector_store = CRUDVectorStore(db=session)
@@ -189,7 +191,7 @@ async def attach_existing_vector_store_from_id():
 
             # nothing provided, no changes made
             else:
-                logging.debug(
+                logger.debug(
                     "No files or vector store id found; assistant will be created with no vector store"
                 )
 

diff --git a/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py b/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py
@@ -69,6 +69,8 @@
     ChatCompletionResponse as ProtobufChatCompletionResponse,
 )
 
+logger = logging.getLogger(__name__)
+
 
 class RunCreateParamsRequestBase(BaseModel):
     assistant_id: str = Field(default="", examples=["123ab"])
@@ -101,13 +103,13 @@ def __init__(self, **data):
         super().__init__(**data)
         # TODO: Temporary fix to ensure max_completion_tokens and max_prompt_tokens are set
         if self.max_completion_tokens is None or self.max_completion_tokens < 1:
-            logging.warning(
+            logger.warning(
                 "max_completion_tokens is not set or is less than 1, setting to %s",
                 DEFAULT_MAX_COMPLETION_TOKENS,
             )
             self.max_completion_tokens = DEFAULT_MAX_COMPLETION_TOKENS
         if self.max_prompt_tokens is None or self.max_prompt_tokens < 1:
-            logging.warning(
+            logger.warning(
                 "max_prompt_tokens is not set or is less than 1, setting to %s",
                 DEFAULT_MAX_PROMPT_TOKENS,
             )
@@ -170,7 +172,7 @@ def can_use_rag(
                         return self.tool_choice.type == "file_search"
                 except ValidationError:
                     traceback.print_exc()
-                    logging.error(
+                    logger.error(
                         "Cannot use RAG for request, failed to validate tool for thread"
                     )
                     return False

diff --git a/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py b/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py
@@ -39,6 +39,9 @@
 from leapfrogai_api.routers.supabase_session import Session
 
 
+logger = logging.getLogger(__name__)
+
+
 class ThreadRunCreateParamsRequest(RunCreateParamsRequestBase):
     thread: ThreadCreateAndRunsThread | None = Field(
         default=None,
@@ -112,7 +115,7 @@ async def create_thread_request(self) -> CreateThreadRequest:
 
                     thread_request.messages.append(new_message)
                 except ValueError as exc:
-                    logging.error(f"\t{exc}")
+                    logger.error(f"\t{exc}")
                     continue
         return thread_request
 

diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py
@@ -22,6 +22,8 @@
 )
 from leapfrogai_api.routers.supabase_session import Session
 
+logger = logging.getLogger(__name__)
+
 router = APIRouter(prefix="/openai/v1/vector_stores", tags=["openai/vector_stores"])
 
 
@@ -128,7 +130,7 @@ async def create_vector_store_file(
         )
         return vector_store_file
     except Exception as exc:
-        logging.exception("Error indexing file")
+        logger.exception("Error indexing file")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail="Failed to create vector store file",