22 lines
693 B
Python
22 lines
693 B
Python
|
import torch
|
||
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
||
|
def process(audio_path):
|
||
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
||
|
"openai/whisper-large-v3",
|
||
|
torch_dtype=torch.float32,
|
||
|
low_cpu_mem_usage=True,
|
||
|
use_safetensors=True
|
||
|
)
|
||
|
model.to("mps")
|
||
|
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
|
||
|
pipe = pipeline(
|
||
|
"automatic-speech-recognition",
|
||
|
model=model,
|
||
|
tokenizer=processor.tokenizer,
|
||
|
feature_extractor=processor.feature_extractor,
|
||
|
torch_dtype=torch.float32,
|
||
|
device="mps"
|
||
|
)
|
||
|
output = pipe(audio_path)["text"]
|
||
|
return output
|