genai-impact · samuelrince · Jul 17, 2024 · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/docs/index.md b/docs/index.md
@@ -32,12 +32,13 @@ For integration with specific providers like OpenAI, additional dependencies can
 pip install ecologits[openai]
 ```
 
-EcoLogits currently supports the following providers:
+EcoLogits currently supports the following providers/clients:
 
 - `anthropic`
 - `cohere`
 - `google-generativeai`
 - `huggingface-hub` (Hugging Face Inference Endpoints)
+- `litellm`
 - `mistralai`
 - `openai`
 

diff --git a/docs/providers.md b/docs/providers.md
@@ -8,6 +8,7 @@
 | Cohere           | `cohere`               | [Guide for Cohere :octicons-link-16:](tutorial/providers/cohere.md)                    |
 | Google Gemini    | `google-generativeai`  | [Guide for Google Gemini :octicons-link-16:](tutorial/providers/google.md)             |
 | Hugging Face Hub | `huggingface-hub`      | [Guide for Hugging Face Hub :octicons-link-16:](tutorial/providers/huggingface_hub.md) |
+| LiteLLM          | `litellm`              | [Guide for LiteLLM :octicons-link-16:](tutorial/providers/litellm.md)                  |
 | Mistral AI       | `mistralai`            | [Guide for Mistral AI :octicons-link-16:](tutorial/providers/mistralai.md)             |
 | OpenAI           | `openai`               | [Guide for OpenAI :octicons-link-16:](tutorial/providers/openai.md)                    |
 
@@ -20,6 +21,7 @@
 | Cohere          | :material-checkbox-marked-circle:  | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: |
 | Google Gemini   | :material-checkbox-marked-circle:  | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: |
 | HuggingFace Hub | :material-checkbox-marked-circle:  | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: |
+| LiteLLM         | :material-checkbox-marked-circle:  | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: |
 | Mistral AI      | :material-checkbox-marked-circle:  | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: |
 | OpenAI          | :material-checkbox-marked-circle:  | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: | :material-checkbox-marked-circle: |
 

diff --git a/docs/tutorial/providers/litellm.md b/docs/tutorial/providers/litellm.md
@@ -0,0 +1,118 @@
+# LiteLLM
+
+This guide focuses on the integration of :seedling: **EcoLogits** with the [LiteLLM official Python client :octicons-link-external-16:](https://github.com/BerriAI/litellm).
+
+Official links:
+
+* Repository: [:simple-github: BerriAI/litellm](https://github.com/BerriAI/litellm)
+* Documentation: [:material-file-document: litellm.vercel.app](https://litellm.vercel.app/docs/#litellm-python-sdk)
+
+
+## Installation
+
+To install EcoLogits along with all necessary dependencies for compatibility with LiteLLM, please use the `litellm` extra-dependency option as follows:
+
+```shell
+pip install ecologits[litellm]
+```
+
+This installation command ensures that EcoLogits is set up with the specific libraries required to interface seamlessly with LiteLLM's Python client.
+
+
+## Chat Completions
+
+### Example
+
+Integrating EcoLogits with your applications does not alter the standard outputs from the API responses. Instead, it enriches them by adding the `Impacts` object, which contains detailed environmental impact data. **Make sure you have the api key of the provider used in an .env file.** Make sure you call the litellm generation function as "litellm.completion" and not just "completion".
+
+=== "Sync"
+
+    ```python
+    from ecologits import EcoLogits
+    import litellm
+
+    # Initialize EcoLogits
+    EcoLogits.init()
+
+    response = litellm.completion(
+        model="gpt-4o-2024-05-13",
+        messages=[{ "content": "Hello, how are you?","role": "user"}]
+    )
+
+    # Get estimated environmental impacts of the inference
+    print(response.impacts)
+    ```
+
+=== "Async"
+
+    ```python
+    import asyncio
+    import litellm
+    from ecologits import EcoLogits
+
+    # Initialize EcoLogits
+    EcoLogits.init()
+
+    async def main() -> None:
+        response = await litellm.acompletion(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": "Tell me a funny joke!"}
+            ]
+        )
+
+        # Get estimated environmental impacts of the inference
+        print(response.impacts)
+
+
+    asyncio.run(main())
+    ```
+
+### Streaming example
+
+**In streaming mode, the impacts are calculated incrementally**, which means you don't need to sum the impacts from each data chunk. Instead, the impact information in the last chunk reflects the total cumulative environmental impacts for the entire request.
+
+=== "Sync" 
+
+    ```python
+    from ecologits import EcoLogits
+    import litellm
+
+    # Initialize EcoLogits
+    EcoLogits.init()
+
+    stream = litellm.completion(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Hello World!"}],
+        stream=True
+    )
+
+    for chunk in stream:
+        # Get cumulative estimated environmental impacts of the inference
+        print(chunk.impacts)
+    ```
+
+=== "Async"
+
+    ```python
+    import asyncio
+    import litellm
+    from ecologits import EcoLogits
+
+    # Initialize EcoLogits
+    EcoLogits.init()
+
+    async def main() -> None:
+        stream = await litellm.acompletion(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": "Tell me a funny joke!"}
+            ]
+        )
+
+        async for chunk in stream:
+            # Get cumulative estimated environmental impacts of the inference
+            print(chunk.impacts)
+
+    asyncio.run(main())
+    ```
diff --git a/ecologits/ecologits.py b/ecologits/ecologits.py
@@ -50,6 +50,7 @@ def init_instruments() -> None:
     init_huggingface_instrumentor()
     init_cohere_instrumentor()
     init_google_instrumentor()
+    init_litellm_instrumentor()
 
 
 def init_openai_instrumentor() -> None:
@@ -101,3 +102,10 @@ def init_google_instrumentor() -> None:
 
         instrumentor = GoogleInstrumentor()
         instrumentor.instrument()
+
+def init_litellm_instrumentor() -> None:
+    if importlib.util.find_spec("litellm") is not None:
+        from ecologits.tracers.litellm_tracer import LiteLLMInstrumentor
+
+        instrumentor = LiteLLMInstrumentor()
+        instrumentor.instrument()
diff --git a/ecologits/model_repository.py b/ecologits/model_repository.py
@@ -37,10 +37,17 @@ def __init__(self, models: list[Model]) -> None:
 
     def find_model(self, provider: str, model_name: str) -> Optional[Model]:
         for model in self.__models:
-            if model.provider == provider and model_name == model.name:
+            # To handle specific LiteLLM calling (e.g., mistral/mistral-small)
+            if model.provider == provider and model.name in model_name:
                 return model
         return None
 
+    def find_provider(self, model_name: str) -> Optional[str]:
+        for model in self.__models:
+            if  model.name in model_name:
+                return model.provider
+        return None
+
     @classmethod
     def from_csv(cls, filepath: Optional[str] = None) -> "ModelRepository":
         if filepath is None:

diff --git a/ecologits/tracers/litellm_tracer.py b/ecologits/tracers/litellm_tracer.py
@@ -0,0 +1,170 @@
+import time
+from typing import Any, Callable, Union
+
+from wrapt import wrap_function_wrapper
+
+from ecologits.impacts import Impacts
+from ecologits.model_repository import models
+from ecologits.tracers.utils import llm_impacts
+
+try:
+    import litellm
+    from litellm import AsyncCompletions, Completions
+    from litellm.types.utils import ModelResponse
+    from litellm.utils import CustomStreamWrapper
+
+except ImportError:
+    ModelResponse = object()
+    CustomStreamWrapper = object()
+    Completions = object()
+    AsyncCompletions = object()
+
+
+class ChatCompletion(ModelResponse):
+    impacts: Impacts
+
+
+class ChatCompletionChunk(ModelResponse):
+    impacts: Impacts
+
+
+def litellm_chat_wrapper(
+    wrapped: Callable,
+    instance: Completions,
+    args: Any,
+    kwargs: Any
+) -> Union[ChatCompletion, CustomStreamWrapper]:
+    if kwargs.get("stream", False):
+        return litellm_chat_wrapper_stream(wrapped, instance, args, kwargs)
+    else:
+        return litellm_chat_wrapper_non_stream(wrapped, instance, args, kwargs)
+
+
+def litellm_chat_wrapper_stream(
+    wrapped: Callable,
+    instance: Completions,      # noqa: ARG001
+    args: Any,
+    kwargs: Any
+) -> CustomStreamWrapper:
+    timer_start = time.perf_counter()
+    stream = wrapped(*args, **kwargs)
+    token_count = 0
+    for i, chunk in enumerate(stream):
+        if i > 0 and chunk.choices[0].finish_reason is None:
+            token_count += 1
+        request_latency = time.perf_counter() - timer_start
+        model_name = chunk.model
+        impacts = llm_impacts(
+            provider=models.find_provider(model_name=model_name),
+            model_name=model_name,
+            output_token_count=token_count,
+            request_latency=request_latency,
+        )
+        if impacts is not None:
+            yield ChatCompletionChunk(**chunk.model_dump(), impacts=impacts)
+        else:
+            yield chunk
+
+def litellm_chat_wrapper_non_stream(
+    wrapped: Callable,
+    instance: Completions,      # noqa: ARG001
+    args: Any,
+    kwargs: Any
+) -> ChatCompletion:
+    timer_start = time.perf_counter()
+    response = wrapped(*args, **kwargs)
+    request_latency = time.perf_counter() - timer_start
+    model_name = response.model
+    impacts = llm_impacts(
+        provider=models.find_provider(model_name=model_name),
+        model_name=model_name,
+        output_token_count=response.usage.completion_tokens,
+        request_latency=request_latency,
+    )
+    if impacts is not None:
+        return ChatCompletion(**response.model_dump(), impacts=impacts)
+    else:
+        return response
+
+async def litellm_async_chat_wrapper(
+    wrapped: Callable,
+    instance: AsyncCompletions,
+    args: Any,
+    kwargs: Any
+) -> Union[ChatCompletion,CustomStreamWrapper]:
+    if kwargs.get("stream", False):
+        return litellm_async_chat_wrapper_stream(wrapped, instance, args, kwargs)
+    else:
+        return await litellm_async_chat_wrapper_base(wrapped, instance, args, kwargs)
+
+
+async def litellm_async_chat_wrapper_base(
+    wrapped: Callable,
+    instance: AsyncCompletions,      # noqa: ARG001
+    args: Any,
+    kwargs: Any
+) -> ChatCompletion:
+    timer_start = time.perf_counter()
+    response = await wrapped(*args, **kwargs)
+    request_latency = time.perf_counter() - timer_start
+    model_name = response.model
+    impacts = llm_impacts(
+        provider=models.find_provider(model_name=model_name),
+        model_name=model_name,
+        output_token_count=response.usage.completion_tokens,
+        request_latency=request_latency,
+    )
+    if impacts is not None:
+        return ChatCompletion(**response.model_dump(), impacts=impacts)
+    else:
+        return response
+
+
+async def litellm_async_chat_wrapper_stream(
+    wrapped: Callable,
+    instance: AsyncCompletions,      # noqa: ARG001
+    args: Any,
+    kwargs: Any
+) -> CustomStreamWrapper:
+    timer_start = time.perf_counter()
+    stream = await wrapped(*args, **kwargs)
+    i = 0
+    token_count = 0
+    async for chunk in stream:
+        if i > 0 and chunk.choices[0].finish_reason is None:
+            token_count += 1
+        request_latency = time.perf_counter() - timer_start
+        model_name = chunk.model
+        impacts = llm_impacts(
+            provider=models.find_provider(model_name=model_name),
+            model_name=model_name,
+            output_token_count=token_count,
+            request_latency=request_latency,
+        )
+        if impacts is not None:
+            yield ChatCompletionChunk(**chunk.model_dump(), impacts=impacts)
+        else:
+            yield chunk
+        i += 1
+
+
+class LiteLLMInstrumentor:
+    def __init__(self) -> None:
+        self.wrapped_methods = [
+            {
+                "module": litellm,
+                "name": "completion",
+                "wrapper": litellm_chat_wrapper,
+            },
+            {
+                "module": litellm,
+                "name": "acompletion",
+                "wrapper": litellm_async_chat_wrapper,
+            },
+        ]
+
+    def instrument(self) -> None:
+        for wrapper in self.wrapped_methods:
+            wrap_function_wrapper(
+                wrapper["module"], wrapper["name"], wrapper["wrapper"]
+            )
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -18,6 +18,7 @@ nav:
           - 'Cohere': tutorial/providers/cohere.md
           - 'Google Gemini': tutorial/providers/google.md
           - 'Hugging Face Hub': tutorial/providers/huggingface_hub.md
+          - 'LiteLLM': tutorial/providers/litellm.md
           - 'Mistral AI': tutorial/providers/mistralai.md
           - 'OpenAI': tutorial/providers/openai.md
   - 'Methodology':