generated from ks6088ts/template-python
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #87 from ks6088ts-labs/feature/issue-86_add-ai-spe…
…ech-feature add AI Speech transcription features
- Loading branch information
Showing
8 changed files
with
189 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
AZURE_AI_SPEECH_ENDPOINT="https://<your-document-intelligence-name>.cognitiveservices.azure.com" | ||
AZURE_AI_SPEECH_API_KEY="<your-api-key>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
from logging import getLogger | ||
from urllib.parse import urljoin | ||
|
||
import requests | ||
|
||
from backend.settings.azure_ai_speech import Settings | ||
|
||
logger = getLogger(__name__) | ||
|
||
|
||
class Client: | ||
def __init__(self, settings: Settings) -> None: | ||
self.settings = settings | ||
|
||
def create_transcription( | ||
self, | ||
content_url: str, | ||
locale: str, | ||
) -> str: | ||
response = requests.post( | ||
url=urljoin( | ||
self.settings.azure_ai_speech_endpoint, | ||
urljoin("speechtotext/v3.2-preview.2/", "transcriptions"), | ||
), | ||
headers={ | ||
"Ocp-Apim-Subscription-Key": self.settings.azure_ai_speech_api_key, | ||
"Content-Type": "application/json", | ||
}, | ||
json={ | ||
"contentUrls": [ | ||
content_url, | ||
], | ||
"locale": locale, | ||
"displayName": "My Transcription", | ||
"model": { | ||
# FIXME: remove the hardcoded model | ||
"self": urljoin( | ||
urljoin( | ||
self.settings.azure_ai_speech_endpoint, | ||
"speechtotext/v3.2-preview.2/models/base", | ||
), | ||
"e418c4a9-9937-4db7-b2c9-8afbff72d950", | ||
), | ||
}, | ||
"properties": { | ||
"diarizationEnabled": False, | ||
"displayFormWordLevelTimestampsEnabled": False, | ||
"wordLevelTimestampsEnabled": False, | ||
"profanityFilterMode": "Masked", | ||
"punctuationMode": "DictatedAndAutomatic", | ||
"timeToLive": "PT24H", # https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-create?pivots=rest-api | ||
}, | ||
}, | ||
) | ||
return response.json()["self"].split("/")[-1] | ||
|
||
def get_transcription( | ||
self, | ||
transcription_id: str, | ||
) -> str: | ||
return requests.get( | ||
url=urljoin( | ||
self.settings.azure_ai_speech_endpoint, | ||
urljoin("speechtotext/v3.2-preview.2/", f"transcriptions/{transcription_id}"), | ||
), | ||
headers={ | ||
"Ocp-Apim-Subscription-Key": self.settings.azure_ai_speech_api_key, | ||
}, | ||
).json() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from logging import getLogger | ||
|
||
from fastapi import APIRouter, status | ||
from fastapi.responses import JSONResponse | ||
|
||
from backend.internals.azure_ai_speech import Client | ||
from backend.schemas import azure_ai_speech as azure_ai_speech_schemas | ||
from backend.settings.azure_ai_speech import Settings | ||
|
||
logger = getLogger(__name__) | ||
|
||
client = Client( | ||
settings=Settings(), | ||
) | ||
|
||
router = APIRouter( | ||
prefix="/azure_ai_speech", | ||
tags=["azure_ai_speech"], | ||
responses={404: {"description": "Not found"}}, | ||
) | ||
|
||
|
||
@router.post( | ||
"/transcriptions", | ||
response_model=azure_ai_speech_schemas.CreateTranscriptionResponse, | ||
status_code=200, | ||
) | ||
async def create_transcription(request: azure_ai_speech_schemas.CreateTranscriptionRequest): | ||
transcription_id = client.create_transcription( | ||
content_url=request.content_url, | ||
locale=request.locale, | ||
) | ||
return azure_ai_speech_schemas.CreateTranscriptionResponse( | ||
transcription_id=transcription_id, | ||
) | ||
|
||
|
||
@router.get( | ||
"/transcriptions/{transcription_id}", | ||
status_code=200, | ||
) | ||
async def get_transcription(transcription_id: str): | ||
return JSONResponse( | ||
status_code=status.HTTP_200_OK, | ||
content=client.get_transcription( | ||
transcription_id=transcription_id, | ||
), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from logging import getLogger | ||
|
||
from pydantic import BaseModel | ||
|
||
logger = getLogger(__name__) | ||
|
||
|
||
class CreateTranscriptionRequest(BaseModel): | ||
content_url: str = "https://<blob_account_name>.blob.core.windows.net/<blob_container_name>/<blob_name>" | ||
locale: str = "ja-JP" | ||
|
||
|
||
class CreateTranscriptionResponse(BaseModel): | ||
transcription_id: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from pydantic_settings import BaseSettings, SettingsConfigDict | ||
|
||
|
||
class Settings(BaseSettings): | ||
azure_ai_speech_endpoint: str = "https://<name>.cognitiveservices.azure.com" | ||
azure_ai_speech_api_key: str = "<api-key>" | ||
|
||
model_config = SettingsConfigDict( | ||
env_file="azure_ai_speech.env", | ||
env_file_encoding="utf-8", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from logging import getLogger | ||
|
||
import pytest | ||
|
||
from tests.utilities import RUN_TEST, client | ||
|
||
logger = getLogger(__name__) | ||
|
||
|
||
@pytest.mark.skipif(RUN_TEST, reason="need to launch the backend server first") | ||
def test_azure_ai_speech_create_transcription(): | ||
path_format = "/azure_ai_speech/{0}" | ||
response = client.post( | ||
url=path_format.format("transcriptions"), | ||
json={ | ||
"content_url": "https://<blob_account_name>.blob.core.windows.net/<blob_container_name>/<blob_name>", | ||
"locale": "ja-JP", | ||
}, | ||
) | ||
assert response.status_code == 200 | ||
logger.info(f"response: {response.json()}") | ||
|
||
|
||
@pytest.mark.skipif(RUN_TEST, reason="need to launch the backend server first") | ||
def test_azure_ai_speech_get_transcription(): | ||
path_format = "/azure_ai_speech/{0}" | ||
transcription_id = "<transcription_id>" | ||
response = client.get( | ||
url=path_format.format(f"transcriptions/{transcription_id}"), | ||
) | ||
assert response.status_code == 200 | ||
logger.info(f"response: {response.json()}") |