diff --git a/libs/community/langchain_community/document_loaders/parsers/audio.py b/libs/community/langchain_community/document_loaders/parsers/audio.py index 31d542e87b..c8fa4c3ed3 100644 --- a/libs/community/langchain_community/document_loaders/parsers/audio.py +++ b/libs/community/langchain_community/document_loaders/parsers/audio.py @@ -1,7 +1,7 @@ import logging import os import time -from typing import Dict, Iterator, Optional, Tuple +from typing import Any, Dict, Iterator, Literal, Optional, Tuple, Union from langchain_core.documents import Document @@ -31,12 +31,32 @@ class OpenAIWhisperParser(BaseBlobParser): *, chunk_duration_threshold: float = 0.1, base_url: Optional[str] = None, + language: Union[str, None] = None, + prompt: Union[str, None] = None, + response_format: Union[ + Literal["json", "text", "srt", "verbose_json", "vtt"], None + ] = None, + temperature: Union[float, None] = None, ): self.api_key = api_key self.chunk_duration_threshold = chunk_duration_threshold self.base_url = ( base_url if base_url is not None else os.environ.get("OPENAI_API_BASE") ) + self.language = language + self.prompt = prompt + self.response_format = response_format + self.temperature = temperature + + @property + def _create_params(self) -> Dict[str, Any]: + params = { + "language": self.language, + "prompt": self.prompt, + "response_format": self.response_format, + "temperature": self.temperature, + } + return {k: v for k, v in params.items() if v is not None} def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" @@ -95,7 +115,7 @@ class OpenAIWhisperParser(BaseBlobParser): try: if is_openai_v1(): transcript = client.audio.transcriptions.create( - model="whisper-1", file=file_obj + model="whisper-1", file=file_obj, **self._create_params ) else: transcript = openai.Audio.transcribe("whisper-1", file_obj)