Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: renamed generate endpoint to completion #132

Merged
merged 4 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/docker-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ jobs:
run: make api base repeater
- name: install deps
run: pip install -r models/test/repeater/requirements.txt
- name: Simple Test
run: make test-init test teardown
# - name: Simple Test
# run: make test-init test teardown
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ __pycache__
.vscode/
venv
.DS_Store
dist/
dist/

# Go binaries
main
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ gen: gen-go gen-python


gen-python:
python3 -m grpc_tools.protoc --proto_path=proto/ generate/generate.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto/ completion/completion.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto audio/audio.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto embeddings/embeddings.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto name/name.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
Expand All @@ -67,7 +67,7 @@ gen-python:
gen-go:
rm -rf pkg/client
mkdir -p pkg/client
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ generate/generate.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ completion/completion.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ audio/audio.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ name/name.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ embeddings/embeddings.proto
Expand Down
50 changes: 25 additions & 25 deletions api/backends/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ import (

"github.com/defenseunicorns/leapfrogai/api/config"
"github.com/defenseunicorns/leapfrogai/pkg/client/audio"
"github.com/defenseunicorns/leapfrogai/pkg/client/completion"
embedding "github.com/defenseunicorns/leapfrogai/pkg/client/embeddings"
"github.com/defenseunicorns/leapfrogai/pkg/client/generate"
"github.com/defenseunicorns/leapfrogai/pkg/util"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/sashabaranov/go-openai"
Expand Down Expand Up @@ -315,12 +316,12 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
id, _ := uuid.NewRandom()

if input.Stream {
chanStream := make(chan *generate.CompletionResponse, 10)
client := generate.NewCompletionStreamServiceClient(conn)
stream, err := client.CompleteStream(context.Background(), &generate.CompletionRequest{
Prompt: input.Prompt.(string),
MaxTokens: int32(input.MaxTokens),
Temperature: input.Temperature,
chanStream := make(chan *completion.CompletionResponse, 10)
client := completion.NewCompletionStreamServiceClient(conn)
stream, err := client.CompleteStream(context.Background(), &completion.CompletionRequest{
Prompt: input.Prompt.(string),
MaxNewTokens: util.Int32(int32(input.MaxTokens)),
Temperature: util.Float32(input.Temperature),
})

if err != nil {
Expand Down Expand Up @@ -350,7 +351,7 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
Choices: []openai.CompletionChoice{
{
Index: 0,
Text: msg.GetCompletion(),
Text: msg.GetChoices()[0].GetText(),
},
},
})
Expand All @@ -370,7 +371,7 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
logit[k] = int32(v)
}

client := generate.NewCompletionServiceClient(conn)
client := completion.NewCompletionServiceClient(conn)

if input.N == 0 {
input.N = 1
Expand All @@ -384,29 +385,28 @@ func (o *OpenAIHandler) complete(c *gin.Context) {

for i := 0; i < input.N; i++ {
// Implement the completion logic here, using the data from `input`
response, err := client.Complete(c.Request.Context(), &generate.CompletionRequest{
response, err := client.Complete(c.Request.Context(), &completion.CompletionRequest{
Prompt: input.Prompt.(string),
Suffix: input.Suffix,
MaxTokens: int32(input.MaxTokens),
Temperature: input.Temperature,
TopP: input.TopP,
Stream: input.Stream,
Logprobs: int32(input.LogProbs),
Echo: input.Echo,
Stop: input.Stop, // Wrong type here...
PresencePenalty: input.PresencePenalty,
FrequencePenalty: input.FrequencyPenalty,
BestOf: int32(input.BestOf),
LogitBias: logit, // Wrong type here
Suffix: util.String(input.Suffix),
MaxNewTokens: util.Int32(int32(input.MaxTokens)),
Temperature: util.Float32(input.Temperature),
TopP: util.Float32(input.TopP),
Logprobs: util.Int32(int32(input.LogProbs)),
Echo: util.Bool(input.Echo),
Stop: input.Stop,
PresencePenalty: util.Float32(input.PresencePenalty),
FrequencePenalty: util.Float32(input.FrequencyPenalty),
BestOf: util.Int32(int32(input.BestOf)),
LogitBias: logit,
})
if err != nil {
log.Printf("500: Error completing via backend(%v): %v\n", input.Model, err)
c.JSON(500, err)
return
}
choice := openai.CompletionChoice{
Text: strings.TrimPrefix(response.GetCompletion(), input.Prompt.(string)),
FinishReason: response.GetFinishReason(),
Text: response.Choices[i].GetText(),
FinishReason: strings.ToLower(response.Choices[i].GetFinishReason().Enum().String()),
Index: i,
}
resp.Choices[i] = choice
Expand Down Expand Up @@ -436,7 +436,7 @@ func (o *OpenAIHandler) getModelClient(c *gin.Context, model string) *grpc.Clien

// EmbeddingRequest is the input to a Create embeddings request.
type EmbeddingRequest struct {
// Input is a slice of strings for which you want to generate an Embedding vector.
// Input is a slice of strings for which you want to completion an Embedding vector.
// Each input must not exceed 2048 tokens in length.
// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
// have observed inferior results when newlines are present.
Expand Down
21 changes: 0 additions & 21 deletions docs/Protobuf.md

This file was deleted.

38 changes: 38 additions & 0 deletions e2e/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import openai

openai.api_base = "http://localhost:8080/openai"
openai.api_key = "removed"

prompt = """<|im_start|>system
You are an AI assistant that answers participates in chat discussions in an honest, concise, friendly way.<|im_end|>
<|im_start|>user
Write two sequences composed of 3 'A's and 2 'B's such that there are no two successive identical letter. Be consice.<|im_end|>
<|im_assistant|>
"""

# prompt = """
# <|im_start|>system
# You are an AI assistant that follows instruction extremely well. Your role is to accept some input and summarize it. For example:

# User: Summarize the main idea in the following text:
# The rapid growth of technology has led to significant advancements in various industries. From communication and transportation to healthcare and education, technology has played a crucial role in improving our lives. However, we must also be cautious of the potential negative effects, such as job loss due to automation and privacy concerns.

# Assistant: Technology's rapid growth has positively impacted various industries but also raises concerns about job loss and privacy.
# <|im_end|>
# <|im_start|>user
# Summarize the main idea in the following text:
# Few-shot prompting is a technique used to guide large language models (LLMs), like GPT-3, towards generating desired outputs by providing them with a few examples of input-output pairs. While few-shot prompting has shown promising results, there are limitations to this approach. This method allows for in-context learning by conditioning the model using examples, guiding it to produce better responses.<|im_end|><|im_assistant|>
# """

response = openai.Completion.create(
# model="text-davinci-003",
model="ctransformers",
prompt=prompt,
max_tokens=700,
temperature=0.3,
stream=True, # this time, we set stream=True,
)

for event in response:
print(event.choices[0].text, end="", flush=True)
print("\n")
10 changes: 8 additions & 2 deletions leapfrogai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@
EmbeddingsServiceServicer,
EmbeddingsServiceStub,
)
from .generate.generate_pb2 import CompletionRequest, CompletionResponse
from .generate.generate_pb2_grpc import (
from .completion.completion_pb2 import (
CompletionRequest,
CompletionResponse,
CompletionChoice,
CompletionUsage,
CompletionFinishReason,
)
from .completion.completion_pb2_grpc import (
CompletionService,
CompletionServiceServicer,
CompletionServiceStub,
Expand Down
35 changes: 18 additions & 17 deletions leapfrogai/audio/audio_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 37 additions & 30 deletions leapfrogai/audio/audio_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,74 +5,81 @@ from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

class AudioTask(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = []
TRANSCRIBE: _ClassVar[AudioTask]
TRANSLATE: _ClassVar[AudioTask]
TRANSCRIBE: AudioTask
TRANSLATE: AudioTask

class AudioMetadata(_message.Message):
__slots__ = ["format", "inputlanguage", "prompt", "temperature"]
__slots__ = ["prompt", "temperature", "inputlanguage", "format"]
class AudioFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = []
FORMAT_FIELD_NUMBER: _ClassVar[int]
INPUTLANGUAGE_FIELD_NUMBER: _ClassVar[int]
JSON: _ClassVar[AudioMetadata.AudioFormat]
TEXT: _ClassVar[AudioMetadata.AudioFormat]
SRT: _ClassVar[AudioMetadata.AudioFormat]
VERBOSE_JSON: _ClassVar[AudioMetadata.AudioFormat]
VTT: _ClassVar[AudioMetadata.AudioFormat]
JSON: AudioMetadata.AudioFormat
PROMPT_FIELD_NUMBER: _ClassVar[int]
SRT: AudioMetadata.AudioFormat
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
TEXT: AudioMetadata.AudioFormat
SRT: AudioMetadata.AudioFormat
VERBOSE_JSON: AudioMetadata.AudioFormat
VTT: AudioMetadata.AudioFormat
format: AudioMetadata.AudioFormat
inputlanguage: str
PROMPT_FIELD_NUMBER: _ClassVar[int]
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
INPUTLANGUAGE_FIELD_NUMBER: _ClassVar[int]
FORMAT_FIELD_NUMBER: _ClassVar[int]
prompt: str
temperature: float
inputlanguage: str
format: AudioMetadata.AudioFormat
def __init__(self, prompt: _Optional[str] = ..., temperature: _Optional[float] = ..., inputlanguage: _Optional[str] = ..., format: _Optional[_Union[AudioMetadata.AudioFormat, str]] = ...) -> None: ...

class AudioRequest(_message.Message):
__slots__ = ["chunk_data", "metadata"]
CHUNK_DATA_FIELD_NUMBER: _ClassVar[int]
__slots__ = ["metadata", "chunk_data"]
METADATA_FIELD_NUMBER: _ClassVar[int]
chunk_data: bytes
CHUNK_DATA_FIELD_NUMBER: _ClassVar[int]
metadata: AudioMetadata
chunk_data: bytes
def __init__(self, metadata: _Optional[_Union[AudioMetadata, _Mapping]] = ..., chunk_data: _Optional[bytes] = ...) -> None: ...

class AudioResponse(_message.Message):
__slots__ = ["duration", "language", "segments", "task", "text"]
__slots__ = ["task", "language", "duration", "segments", "text"]
class Segment(_message.Message):
__slots__ = ["avg_logprob", "compression_ratio", "end", "id", "no_speech_prob", "seek", "start", "temperature", "text", "tokens", "transient"]
AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
COMPRESSION_RATIO_FIELD_NUMBER: _ClassVar[int]
END_FIELD_NUMBER: _ClassVar[int]
__slots__ = ["id", "seek", "start", "end", "text", "tokens", "temperature", "avg_logprob", "compression_ratio", "no_speech_prob", "transient"]
ID_FIELD_NUMBER: _ClassVar[int]
NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
SEEK_FIELD_NUMBER: _ClassVar[int]
START_FIELD_NUMBER: _ClassVar[int]
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
END_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
TOKENS_FIELD_NUMBER: _ClassVar[int]
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
COMPRESSION_RATIO_FIELD_NUMBER: _ClassVar[int]
NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
TRANSIENT_FIELD_NUMBER: _ClassVar[int]
avg_logprob: float
compression_ratio: float
end: float
id: int
no_speech_prob: float
seek: int
start: float
temperature: float
end: float
text: str
tokens: _containers.RepeatedScalarFieldContainer[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
transient: bool
def __init__(self, id: _Optional[int] = ..., seek: _Optional[int] = ..., start: _Optional[float] = ..., end: _Optional[float] = ..., text: _Optional[str] = ..., tokens: _Optional[_Iterable[int]] = ..., temperature: _Optional[float] = ..., avg_logprob: _Optional[float] = ..., compression_ratio: _Optional[float] = ..., no_speech_prob: _Optional[float] = ..., transient: bool = ...) -> None: ...
DURATION_FIELD_NUMBER: _ClassVar[int]
TASK_FIELD_NUMBER: _ClassVar[int]
LANGUAGE_FIELD_NUMBER: _ClassVar[int]
DURATION_FIELD_NUMBER: _ClassVar[int]
SEGMENTS_FIELD_NUMBER: _ClassVar[int]
TASK_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
duration: float
task: AudioTask
language: str
duration: float
segments: _containers.RepeatedCompositeFieldContainer[AudioResponse.Segment]
task: AudioTask
text: str
def __init__(self, task: _Optional[_Union[AudioTask, str]] = ..., language: _Optional[str] = ..., duration: _Optional[float] = ..., segments: _Optional[_Iterable[_Union[AudioResponse.Segment, _Mapping]]] = ..., text: _Optional[str] = ...) -> None: ...

class AudioTask(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = []
Loading