From 03bf0fd7e42fe64b9c2dc234b9ee9433f62401bc Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Tue, 3 Sep 2024 11:20:11 -0400 Subject: [PATCH] chore: add logger configuration to our various main entrypoints (#918) * fix: remove erroneous logging config setting * chore: set logging basicConfig in main files * chore: avoid using root logger * chore: add ability to set lfai log level based on env var Signed-off-by: Jon Perry --------- Signed-off-by: Jon Perry --- packages/llama-cpp-python/main.py | 9 ++++++++ packages/text-embeddings/main.py | 18 ++++++++++++++-- packages/vllm/src/main.py | 21 ++++++++++++------- packages/whisper/main.py | 5 ++++- src/leapfrogai_api/backend/rag/index.py | 10 +++++---- .../backend/rag/leapfrogai_embeddings.py | 4 +++- src/leapfrogai_api/main.py | 18 +++++++++------- .../create_modify_assistant_request.py | 8 ++++--- .../run_create_params_request_base.py | 8 ++++--- .../thread_run_create_params_request.py | 5 ++++- .../routers/openai/vector_stores.py | 4 +++- .../routers/supabase_session.py | 10 +++++---- src/leapfrogai_api/utils/config.py | 21 ++++++++++--------- src/leapfrogai_sdk/config.py | 4 +++- 14 files changed, 98 insertions(+), 47 deletions(-) diff --git a/packages/llama-cpp-python/main.py b/packages/llama-cpp-python/main.py index 70b01cc66..dd5d52f02 100644 --- a/packages/llama-cpp-python/main.py +++ b/packages/llama-cpp-python/main.py @@ -1,3 +1,4 @@ +import logging import os from typing import Any, AsyncGenerator @@ -10,6 +11,12 @@ False if os.environ.get("GPU_ENABLED", "False").lower() != "true" else True ) +logging.basicConfig( + level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), + format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s", +) +logger = logging.getLogger(__name__) + @LLM class Model: @@ -27,6 +34,7 @@ class Model: async def generate( self, prompt: str, config: GenerationConfig ) -> AsyncGenerator[str, Any]: + logger.info("Begin generating streamed response") for res in self.llm( prompt, stream=True, @@ -37,6 +45,7 @@ async def generate( stop=self.backend_config.stop_tokens, ): yield res["choices"][0]["text"] # type: ignore + logger.info("Streamed response complete") async def count_tokens(self, raw_text: str) -> int: string_bytes: bytes = bytes(raw_text, "utf-8") diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py index 0ad8ce824..1abd2521d 100644 --- a/packages/text-embeddings/main.py +++ b/packages/text-embeddings/main.py @@ -11,18 +11,32 @@ serve, ) +logging.basicConfig( + level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), + format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s", +) +logger = logging.getLogger(__name__) + + model_dir = os.environ.get("LFAI_MODEL_PATH", ".model") model = INSTRUCTOR(model_dir) class InstructorEmbedding: async def CreateEmbedding(self, request: EmbeddingRequest, context: GrpcContext): - embeddings = model.encode(request.inputs) + logger.info( + f"processing CreateEmbedding request: char-length: {len(str(request.inputs))} word-count: {len(str(request.inputs).split())}" + ) + + embeddings = model.encode(request.inputs, show_progress_bar=True) embeddings = [Embedding(embedding=inner_list) for inner_list in embeddings] + + logger.info( + f"finished processing CreateEmbedding request, created {len(embeddings)} embeddings" + ) return EmbeddingResponse(embeddings=embeddings) if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) asyncio.run(serve(InstructorEmbedding())) diff --git a/packages/vllm/src/main.py b/packages/vllm/src/main.py index 307cfbc91..6a530e4f0 100644 --- a/packages/vllm/src/main.py +++ b/packages/vllm/src/main.py @@ -30,6 +30,12 @@ load_dotenv() +logging.basicConfig( + level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), + format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s", +) +logger = logging.getLogger(__name__) + def clamp(n: float | int, smallest: float | int, largest: float | int): return max(smallest, min(n, largest)) @@ -142,8 +148,6 @@ class Model: random_iterator: RandomAsyncIterator = RandomAsyncIterator([]) def __init__(self): - logging.getLogger().setLevel(logging.DEBUG) - # Background thread for managing output iteration _thread = threading.Thread(target=asyncio.run, args=(self.iterate_outputs(),)) _thread.start() @@ -184,7 +188,7 @@ async def iterate_outputs(self): and request_output.finished ): # Signal that the "generate" function can stop waiting for additional inputs - logging.info( + logger.info( f"Generated {num_tokens_by_id[request_id]} tokens in {time.time() - t0_by_id[request_id]:.2f}s" ) self.done_by_id[request_id] = True @@ -234,12 +238,13 @@ async def create_response( max_tokens=config.max_new_tokens, skip_special_tokens=False, ) - logging.debug(sampling_params) - logging.info(f"Begin generation for request {request_id}") + logger.info(f"Begin generation for request {request_id}") + logger.debug(f"{request_id} sampling_paramms: {sampling_params}") + # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. gen_iter = self.engine.generate(prompt, sampling_params, request_id) - logging.info(f"Begin iteration for request {request_id}") + logger.info(f"Begin iteration for request {request_id}") self.random_iterator.add_iterator(gen_iter) async def generate_session( @@ -273,7 +278,7 @@ async def generate( ) _thread.start() - logging.info(f"Begin reading the output for request {request_id}") + logger.info(f"Begin reading the output for request {request_id}") while not self.done_by_id.get(request_id) or not self.is_queue_empty( request_id @@ -283,7 +288,7 @@ async def generate( result = self.delta_queue_by_id.get(request_id).get() yield result - logging.info(f"Finished request {request_id}") + logger.info(f"Finished request {request_id}") async def count_tokens(self, raw_text: str) -> int: tokens: list[int] | list[str] = (await self.engine.get_tokenizer()).tokenize( diff --git a/packages/whisper/main.py b/packages/whisper/main.py index 467a7039a..e3d356ac9 100644 --- a/packages/whisper/main.py +++ b/packages/whisper/main.py @@ -7,6 +7,10 @@ import leapfrogai_sdk as lfai from faster_whisper import WhisperModel +logging.basicConfig( + level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), + format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s", +) logger = logging.getLogger(__name__) model_path = os.environ.get("LFAI_MODEL_PATH", ".model") @@ -107,7 +111,6 @@ def Name(self, request, context): async def main(): - logging.basicConfig(level=logging.INFO) logger.info(f"GPU_ENABLED = {GPU_ENABLED}") await lfai.serve(Whisper()) diff --git a/src/leapfrogai_api/backend/rag/index.py b/src/leapfrogai_api/backend/rag/index.py index ce4ade400..1fc0d7311 100644 --- a/src/leapfrogai_api/backend/rag/index.py +++ b/src/leapfrogai_api/backend/rag/index.py @@ -28,6 +28,8 @@ from leapfrogai_api.data.crud_vector_content import CRUDVectorContent, Vector +logger = logging.getLogger(__name__) + # Allows for overwriting type of embeddings that will be instantiated embeddings_type: type[Embeddings] | type[LeapfrogAIEmbeddings] | None = ( LeapfrogAIEmbeddings @@ -56,13 +58,13 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil if await crud_vector_store_file.get( filters=FilterVectorStoreFile(vector_store_id=vector_store_id, id=file_id) ): - logging.error("File already indexed: %s", file_id) + logger.error("File already indexed: %s", file_id) raise FileAlreadyIndexedError("File already indexed") if not ( await crud_vector_store.get(filters=FilterVectorStore(id=vector_store_id)) ): - logging.error("Vector store doesn't exist: %s", vector_store_id) + logger.error("Vector store doesn't exist: %s", vector_store_id) raise ValueError("Vector store not found") crud_file_object = CRUDFileObject(db=self.db) @@ -146,7 +148,7 @@ async def index_files( ) responses.append(response) except FileAlreadyIndexedError: - logging.info("File %s already exists and cannot be re-indexed", file_id) + logger.info("File %s already exists and cannot be re-indexed", file_id) continue except Exception as exc: raise exc @@ -267,7 +269,7 @@ async def modify_existing_vector_store( object_=new_vector_store, ) except Exception as exc: - logging.error(exc) + logger.error(exc) raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Unable to parse vector store request", diff --git a/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py b/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py index 0731a8e74..8274a5861 100644 --- a/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py +++ b/src/leapfrogai_api/backend/rag/leapfrogai_embeddings.py @@ -6,6 +6,8 @@ from leapfrogai_api.backend.grpc_client import create_embeddings import logging +logger = logging.getLogger(__name__) + # Partially implements the Langchain Core Embeddings interface class LeapfrogAIEmbeddings: @@ -61,7 +63,7 @@ async def _get_model( """ if not (model := get_model_config().get_model_backend(model=model_name)): - logging.error(f"Embeddings model {model_name} not found.") + logger.error(f"Embeddings model {model_name} not found.") raise ValueError("Embeddings model not found.") return model diff --git a/src/leapfrogai_api/main.py b/src/leapfrogai_api/main.py index fa0a1e056..c3c806dbd 100644 --- a/src/leapfrogai_api/main.py +++ b/src/leapfrogai_api/main.py @@ -2,6 +2,7 @@ import asyncio import logging +import os from contextlib import asynccontextmanager from fastapi import FastAPI @@ -28,17 +29,23 @@ ) from leapfrogai_api.utils import get_model_config +logging.basicConfig( + level=os.getenv("LFAI_LOG_LEVEL", logging.INFO), + format="%(name)s: %(asctime)s | %(levelname)s | %(filename)s:%(lineno)s >>> %(message)s", +) +logger = logging.getLogger(__name__) + # handle startup & shutdown tasks @asynccontextmanager async def lifespan(app: FastAPI): """Handle startup and shutdown tasks for the FastAPI app.""" # startup - logging.info("Starting to watch for configs") + logger.info("Starting to watch for configs with this being an info") asyncio.create_task(get_model_config().watch_and_load_configs()) yield # shutdown - logging.info("Clearing model configs") + logger.info("Clearing model configs") asyncio.create_task(get_model_config().clear_all_models()) @@ -47,15 +54,10 @@ async def lifespan(app: FastAPI): @app.exception_handler(RequestValidationError) async def validation_exception_handler(request, exc): - logging.error(f"The client sent invalid data!: {exc}") + logger.error(f"The client sent invalid data!: {exc}") return await request_validation_exception_handler(request, exc) -logger = logging.getLogger(__name__) -handler = logging.StreamHandler() -handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")) -logger.addHandler(handler) - app.include_router(base_router) app.include_router(auth.router) app.include_router(models.router) diff --git a/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py b/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py index 28a10b033..8a0791300 100644 --- a/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py +++ b/src/leapfrogai_api/routers/openai/requests/create_modify_assistant_request.py @@ -22,6 +22,8 @@ ) from openai.types.beta.assistant_tool import FileSearchTool +logger = logging.getLogger(__name__) + class CreateAssistantRequest(BaseModel): """Request object for creating an assistant.""" @@ -95,7 +97,7 @@ async def request_checks_and_modifications(self, session: Session): """ async def new_vector_store_from_file_ids(): - logging.debug("Creating vector store for new assistant") + logger.debug("Creating vector store for new assistant") indexing_service = IndexingService(db=session) vector_store_params_dict = vector_stores[0] @@ -127,7 +129,7 @@ async def new_vector_store_from_file_ids(): self.tool_resources.file_search.vector_store_ids = [vector_store.id] async def attach_existing_vector_store_from_id(): - logging.debug( + logger.debug( "Attaching vector store with id: {} to new assistant".format(ids[0]) ) crud_vector_store = CRUDVectorStore(db=session) @@ -189,7 +191,7 @@ async def attach_existing_vector_store_from_id(): # nothing provided, no changes made else: - logging.debug( + logger.debug( "No files or vector store id found; assistant will be created with no vector store" ) diff --git a/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py b/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py index 3f9834340..0770178f6 100644 --- a/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py +++ b/src/leapfrogai_api/routers/openai/requests/run_create_params_request_base.py @@ -69,6 +69,8 @@ ChatCompletionResponse as ProtobufChatCompletionResponse, ) +logger = logging.getLogger(__name__) + class RunCreateParamsRequestBase(BaseModel): assistant_id: str = Field(default="", examples=["123ab"]) @@ -101,13 +103,13 @@ def __init__(self, **data): super().__init__(**data) # TODO: Temporary fix to ensure max_completion_tokens and max_prompt_tokens are set if self.max_completion_tokens is None or self.max_completion_tokens < 1: - logging.warning( + logger.warning( "max_completion_tokens is not set or is less than 1, setting to %s", DEFAULT_MAX_COMPLETION_TOKENS, ) self.max_completion_tokens = DEFAULT_MAX_COMPLETION_TOKENS if self.max_prompt_tokens is None or self.max_prompt_tokens < 1: - logging.warning( + logger.warning( "max_prompt_tokens is not set or is less than 1, setting to %s", DEFAULT_MAX_PROMPT_TOKENS, ) @@ -170,7 +172,7 @@ def can_use_rag( return self.tool_choice.type == "file_search" except ValidationError: traceback.print_exc() - logging.error( + logger.error( "Cannot use RAG for request, failed to validate tool for thread" ) return False diff --git a/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py b/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py index 0eaf6460c..eb38b8406 100644 --- a/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py +++ b/src/leapfrogai_api/routers/openai/requests/thread_run_create_params_request.py @@ -39,6 +39,9 @@ from leapfrogai_api.routers.supabase_session import Session +logger = logging.getLogger(__name__) + + class ThreadRunCreateParamsRequest(RunCreateParamsRequestBase): thread: ThreadCreateAndRunsThread | None = Field( default=None, @@ -112,7 +115,7 @@ async def create_thread_request(self) -> CreateThreadRequest: thread_request.messages.append(new_message) except ValueError as exc: - logging.error(f"\t{exc}") + logger.error(f"\t{exc}") continue return thread_request diff --git a/src/leapfrogai_api/routers/openai/vector_stores.py b/src/leapfrogai_api/routers/openai/vector_stores.py index 765d07429..0384e7928 100644 --- a/src/leapfrogai_api/routers/openai/vector_stores.py +++ b/src/leapfrogai_api/routers/openai/vector_stores.py @@ -22,6 +22,8 @@ ) from leapfrogai_api.routers.supabase_session import Session +logger = logging.getLogger(__name__) + router = APIRouter(prefix="/openai/v1/vector_stores", tags=["openai/vector_stores"]) @@ -128,7 +130,7 @@ async def create_vector_store_file( ) return vector_store_file except Exception as exc: - logging.exception("Error indexing file") + logger.exception("Error indexing file") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to create vector store file", diff --git a/src/leapfrogai_api/routers/supabase_session.py b/src/leapfrogai_api/routers/supabase_session.py index f442060f3..400974066 100644 --- a/src/leapfrogai_api/routers/supabase_session.py +++ b/src/leapfrogai_api/routers/supabase_session.py @@ -12,6 +12,8 @@ import gotrue from leapfrogai_api.backend.security.api_key import APIKey +logger = logging.getLogger(__name__) + security = HTTPBearer() @@ -61,19 +63,19 @@ async def init_supabase_client( access_token=auth_creds.credentials, refresh_token="dummy" ) except gotrue.errors.AuthApiError as e: - logging.exception("\t%s", e) + logger.exception("\t%s", e) raise HTTPException( detail="Token has expired or is not valid. Generate a new token", status_code=status.HTTP_401_UNAUTHORIZED, ) from e except binascii.Error as e: - logging.exception("\t%s", e) + logger.exception("\t%s", e) raise HTTPException( detail="Failed to validate Authentication Token", status_code=status.HTTP_401_UNAUTHORIZED, ) from e except Exception as e: - logging.exception("\t%s", e) + logger.exception("\t%s", e) raise HTTPException( detail="Failed to create Supabase session", status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -97,7 +99,7 @@ async def init_supabase_client( return client except ValueError as e: - logging.exception("\t%s", e) + logger.exception("\t%s", e) raise HTTPException( detail="Failed to validate API Key", status_code=status.HTTP_401_UNAUTHORIZED, diff --git a/src/leapfrogai_api/utils/config.py b/src/leapfrogai_api/utils/config.py index 9bb1a00a7..60edfb3e8 100644 --- a/src/leapfrogai_api/utils/config.py +++ b/src/leapfrogai_api/utils/config.py @@ -8,7 +8,8 @@ import yaml from watchfiles import Change, awatch -logging.basicConfig(level=logging.INFO) + +logger = logging.getLogger(__name__) class Model: @@ -50,7 +51,7 @@ async def watch_and_load_configs(self, directory=".", filename="config.yaml"): async for changes in awatch(directory, recursive=False, step=50): # get two unique lists of files that have been (updated files and deleted files) # (awatch can return duplicates depending on the type of updates that happen) - logging.info("Config changes detected: {}".format(changes)) + logger.info("Config changes detected: {}".format(changes)) unique_new_files = set() unique_deleted_files = set() for change in changes: @@ -77,10 +78,10 @@ async def clear_all_models(self): # reset the model config on shutdown (so old model configs don't get cached) self.models = {} self.config_sources = {} - logging.info("All models have been removed") + logger.info("All models have been removed") def load_config_file(self, directory: str, config_file: str): - logging.info("Loading config file: {}/{}".format(directory, config_file)) + logger.info("Loading config file: {}/{}".format(directory, config_file)) # load the config file into the config object config_path = os.path.join(directory, config_file) @@ -93,25 +94,25 @@ def load_config_file(self, directory: str, config_file: str): loaded_artifact = yaml.safe_load(c) else: # TODO: Return an error ??? - logging.error(f"Unsupported file type: {config_path}") + logger.error(f"Unsupported file type: {config_path}") return # parse the object into our config self.parse_models(loaded_artifact, config_file) - logging.info("loaded artifact at {}".format(config_path)) + logger.info("loaded artifact at {}".format(config_path)) return def load_all_configs(self, directory="", filename="config.yaml"): - logging.info( + logger.info( "Loading all configs in {} that match the name '{}'".format( directory, filename ) ) if not os.path.exists(directory): - logging.error("The config directory ({}) does not exist".format(directory)) + logger.error("The config directory ({}) does not exist".format(directory)) return "THE CONFIG DIRECTORY DOES NOT EXIST" # Get all config files and load them into the config object @@ -137,12 +138,12 @@ def parse_models(self, loaded_artifact, config_file): self.config_sources[config_file].append(m["name"]) except KeyError: self.config_sources[config_file] = [m["name"]] - logging.info("added {} to model config".format(m["name"])) + logger.info("added {} to model config".format(m["name"])) def remove_model_by_config(self, config_file): for model_name in self.config_sources[config_file]: self.models.pop(model_name) - logging.info("removed {} from model config".format(model_name)) + logger.info("removed {} from model config".format(model_name)) # clear config once all corresponding models are deleted self.config_sources.pop(config_file) diff --git a/src/leapfrogai_sdk/config.py b/src/leapfrogai_sdk/config.py index 4932bef17..70b81bfcc 100644 --- a/src/leapfrogai_sdk/config.py +++ b/src/leapfrogai_sdk/config.py @@ -6,6 +6,8 @@ from leapfrogai_sdk import ChatItem, ChatRole +logger = logging.getLogger(__name__) + class LLMDefaults(BaseConfig): temperature: float = 0.5 @@ -58,7 +60,7 @@ def apply_chat_template( elif item.role == ChatRole.USER: prompt += self.prompt_format.chat.user.format(item.content) elif item.role == ChatRole.FUNCTION: - logging.warning( + logger.warning( "ChatRole FUNCTION is not implemented for this model and this ChatItem will be ignored." ) # add the response prefix to start the model's reponse