Skip to content

Commit

Permalink
add AI Speech features
Browse files Browse the repository at this point in the history
  • Loading branch information
ks6088ts committed Jun 9, 2024
2 parents 1ce48e8 + 1222837 commit 3334aea
Show file tree
Hide file tree
Showing 10 changed files with 211 additions and 16 deletions.
2 changes: 2 additions & 0 deletions azure_ai_speech.env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
AZURE_AI_SPEECH_ENDPOINT="https://<your-document-intelligence-name>.cognitiveservices.azure.com"
AZURE_AI_SPEECH_API_KEY="<your-api-key>"
2 changes: 2 additions & 0 deletions backend/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from fastapi.openapi.utils import get_openapi

from backend.routers import azure_ai_document_intelligence as azure_ai_document_intelligence_router
from backend.routers import azure_ai_speech as azure_ai_speech_router
from backend.routers import azure_ai_vision as azure_ai_vision_router
from backend.routers import azure_cosmos_db as azure_cosmos_db_router
from backend.routers import azure_event_grid as azure_event_grid_router
Expand All @@ -21,6 +22,7 @@
azure_storage_blob_router.router,
azure_storage_queue_router.router,
azure_cosmos_db_router.router,
azure_ai_speech_router.router,
]:
app.include_router(router)

Expand Down
69 changes: 69 additions & 0 deletions backend/internals/azure_ai_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from logging import getLogger
from urllib.parse import urljoin

import requests

from backend.settings.azure_ai_speech import Settings

logger = getLogger(__name__)


class Client:
def __init__(self, settings: Settings) -> None:
self.settings = settings

def create_transcription(
self,
content_url: str,
locale: str,
) -> str:
response = requests.post(
url=urljoin(
self.settings.azure_ai_speech_endpoint,
urljoin("speechtotext/v3.2-preview.2/", "transcriptions"),
),
headers={
"Ocp-Apim-Subscription-Key": self.settings.azure_ai_speech_api_key,
"Content-Type": "application/json",
},
json={
"contentUrls": [
content_url,
],
"locale": locale,
"displayName": "My Transcription",
"model": {
# FIXME: remove the hardcoded model
"self": urljoin(
urljoin(
self.settings.azure_ai_speech_endpoint,
"speechtotext/v3.2-preview.2/models/base",
),
"e418c4a9-9937-4db7-b2c9-8afbff72d950",
),
},
"properties": {
"diarizationEnabled": False,
"displayFormWordLevelTimestampsEnabled": False,
"wordLevelTimestampsEnabled": False,
"profanityFilterMode": "Masked",
"punctuationMode": "DictatedAndAutomatic",
"timeToLive": "PT24H", # https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-create?pivots=rest-api
},
},
)
return response.json()["self"].split("/")[-1]

def get_transcription(
self,
transcription_id: str,
) -> str:
return requests.get(
url=urljoin(
self.settings.azure_ai_speech_endpoint,
urljoin("speechtotext/v3.2-preview.2/", f"transcriptions/{transcription_id}"),
),
headers={
"Ocp-Apim-Subscription-Key": self.settings.azure_ai_speech_api_key,
},
).json()
48 changes: 48 additions & 0 deletions backend/routers/azure_ai_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from logging import getLogger

from fastapi import APIRouter, status
from fastapi.responses import JSONResponse

from backend.internals.azure_ai_speech import Client
from backend.schemas import azure_ai_speech as azure_ai_speech_schemas
from backend.settings.azure_ai_speech import Settings

logger = getLogger(__name__)

client = Client(
settings=Settings(),
)

router = APIRouter(
prefix="/azure_ai_speech",
tags=["azure_ai_speech"],
responses={404: {"description": "Not found"}},
)


@router.post(
"/transcriptions",
response_model=azure_ai_speech_schemas.CreateTranscriptionResponse,
status_code=200,
)
async def create_transcription(request: azure_ai_speech_schemas.CreateTranscriptionRequest):
transcription_id = client.create_transcription(
content_url=request.content_url,
locale=request.locale,
)
return azure_ai_speech_schemas.CreateTranscriptionResponse(
transcription_id=transcription_id,
)


@router.get(
"/transcriptions/{transcription_id}",
status_code=200,
)
async def get_transcription(transcription_id: str):
return JSONResponse(
status_code=status.HTTP_200_OK,
content=client.get_transcription(
transcription_id=transcription_id,
),
)
14 changes: 14 additions & 0 deletions backend/schemas/azure_ai_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from logging import getLogger

from pydantic import BaseModel

logger = getLogger(__name__)


class CreateTranscriptionRequest(BaseModel):
content_url: str = "https://<blob_account_name>.blob.core.windows.net/<blob_container_name>/<blob_name>"
locale: str = "ja-JP"


class CreateTranscriptionResponse(BaseModel):
transcription_id: str
11 changes: 11 additions & 0 deletions backend/settings/azure_ai_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
azure_ai_speech_endpoint: str = "https://<name>.cognitiveservices.azure.com"
azure_ai_speech_api_key: str = "<api-key>"

model_config = SettingsConfigDict(
env_file="azure_ai_speech.env",
env_file_encoding="utf-8",
)
16 changes: 14 additions & 2 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
- [FastAPI > Extending OpenAPI](https://fastapi.tiangolo.com/how-to/extending-openapi/)
- [Get started with Azure Blob Storage and Python](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blob-python-get-started?tabs=sas-token)
- [FastAPI で実装した様々なエンドポイントのテストを書く(フォームデータの送信、クッキーの確認、ファイルのアップロード等)](https://qiita.com/kurumaebi65/items/d5cda239ef601f4c36ef#%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB%E3%82%92%E3%82%A2%E3%83%83%E3%83%97%E3%83%AD%E3%83%BC%E3%83%89%E3%83%80%E3%82%A6%E3%83%B3%E3%83%AD%E3%83%BC%E3%83%89)
- [LangChainのstreaming出力で苦労している人おる](https://qiita.com/numekudi/items/4a9e7728ac10c3515ed1)
- [LangChain の streaming 出力で苦労している人おる](https://qiita.com/numekudi/items/4a9e7728ac10c3515ed1)
- [Quickstart: Azure Queue Storage client library for Python](https://learn.microsoft.com/en-us/azure/storage/queues/storage-quickstart-queues-python?tabs=passwordless%2Croles-azure-portal%2Cenvironment-variable-windows%2Csign-in-azure-cli)
- [Azure Event Grid client library for Python - version 4.19.0](https://learn.microsoft.com/en-us/python/api/overview/azure/eventgrid-readme?view=azure-python)
- [Azure Event Grid Client Library Python Samples](https://learn.microsoft.com/en-us/samples/azure/azure-sdk-for-python/eventgrid-samples/)
Expand All @@ -39,7 +39,8 @@
- [Streamlit API cheat sheet](https://docs.streamlit.io/develop/quick-reference/cheat-sheet)
- [Streamlit > Display progress and status](https://docs.streamlit.io/develop/api-reference/status)
- [streamlit-audiorecorder](https://github.com/theevann/streamlit-audiorecorder)
- [Build a basic LLM chat app](https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps)
- [Streamlit > Build a basic LLM chat app](https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps)
- [OpenAI の新しい API を Streamlit で使ってみた](https://qiita.com/papasim824/items/5a3bee4cc3915d5ae177)
- [aiohttp > Installing all speedups in one command](https://docs.aiohttp.org/en/stable/#installing-all-speedups-in-one-command)
- [Python & aiohttp: How to upload files to a remote server](https://www.slingacademy.com/article/python-aiohttp-how-to-upload-files-to-a-remote-server/)

Expand Down Expand Up @@ -76,6 +77,17 @@

- [Azure AI Document Intelligence client library for Python](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/documentintelligence/azure-ai-documentintelligence/README.md)

### Azure AI Speech Service

- [Whisper model via Azure AI Speech or via Azure OpenAI Service?](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/whisper-overview#whisper-model-via-azure-ai-speech-or-via-azure-openai-service)
- [What is batch transcription?](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription)
- [Locate audio files for batch transcription > Assign resource access role](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-audio-data?tabs=portal#assign-resource-access-role)
- [Use a Whisper model](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-create?pivots=rest-api#use-a-whisper-model)
- [Get batch transcription results](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-get?pivots=rest-api)
- [How to use the Speech Services Batch Transcription API from Python](https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/batch/python/README.md)
- [SpeechToText API specification](https://github.com/Azure/azure-rest-api-specs/blob/main/specification/cognitiveservices/data-plane/Speech/SpeechToText/preview/v3.2-preview.2/speechtotext.json)
- [Azure-Samples/cognitive-services-speech-sdk/samples/batch/python/python-client/main.py](https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/82b4212f0905349d6607bd6f210d0e52305d5d6c/samples/batch/python/python-client/main.py)

### Azure Cosmos DB

- [Manage Azure Cosmos DB for NoSQL resources with Bicep > Azure Cosmos DB account with autoscale throughput](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/manage-with-bicep#azure-cosmos-db-account-with-autoscale-throughput)
Expand Down
24 changes: 16 additions & 8 deletions frontend/solutions/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,19 @@ def start(
with st.chat_message("user"):
st.markdown(prompt)

with st.chat_message("assistant"):
stream = client.chat.completions.create(
model=getenv("AZURE_OPENAI_GPT_MODEL"),
messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
stream=True,
)
response = st.write_stream(stream)
st.session_state.messages.append({"role": "assistant", "content": response})
response = client.chat.completions.create(
model=getenv("AZURE_OPENAI_GPT_MODEL"),
messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
stream=True,
)
with st.chat_message("assistant", avatar="assistant"):
placeholder = st.empty()
assistant_text = ""
for chunk in response:
if len(chunk.choices) <= 0:
continue
content = chunk.choices[0].delta.content
if content:
assistant_text += content
placeholder.write(assistant_text)
st.session_state.messages.append({"role": "assistant", "content": assistant_text})
9 changes: 3 additions & 6 deletions frontend/solutions/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ async def chat_completions_post(
response = await client.azure_openai.chat_completions.post(
ChatCompletionRequest(
content=prompt,
stream=False,
),
)
return response.content
Expand All @@ -42,14 +41,12 @@ def start(
logger.setLevel(log_level)
logger.debug(f"set log level to {log_level}")

st.write("Misc solution")

# GET
st.write("Get OpenAPI spec")
if st.button("GET"):
logger.info("Fetching data from backend...")
try:
with st.spinner("Calling API..."):
response = asyncio.run(http_get(url=urljoin(base=backend_url, url="")))
response = asyncio.run(http_get(url=urljoin(base=urljoin(backend_url, "openapi.json"), url="")))
st.write(response)
logger.info("Data fetched successfully.")
except Exception as e:
Expand All @@ -58,7 +55,7 @@ def start(

st.write("---")

# POST
st.write("Call Azure OpenAI API")
prompt = st.text_input(
label="Prompt",
value="Hello",
Expand Down
32 changes: 32 additions & 0 deletions tests/test_smoke_azure_ai_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from logging import getLogger

import pytest

from tests.utilities import RUN_TEST, client

logger = getLogger(__name__)


@pytest.mark.skipif(RUN_TEST, reason="need to launch the backend server first")
def test_azure_ai_speech_create_transcription():
path_format = "/azure_ai_speech/{0}"
response = client.post(
url=path_format.format("transcriptions"),
json={
"content_url": "https://<blob_account_name>.blob.core.windows.net/<blob_container_name>/<blob_name>",
"locale": "ja-JP",
},
)
assert response.status_code == 200
logger.info(f"response: {response.json()}")


@pytest.mark.skipif(RUN_TEST, reason="need to launch the backend server first")
def test_azure_ai_speech_get_transcription():
path_format = "/azure_ai_speech/{0}"
transcription_id = "<transcription_id>"
response = client.get(
url=path_format.format(f"transcriptions/{transcription_id}"),
)
assert response.status_code == 200
logger.info(f"response: {response.json()}")

0 comments on commit 3334aea

Please sign in to comment.