Meatgrinder/transcribe.py

23 lines
707 B
Python
Raw Permalink Normal View History

2024-10-10 14:51:48 +00:00
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
2024-10-16 13:52:53 +00:00
def process(audio_path, model_name):
2024-10-10 14:51:48 +00:00
model = AutoModelForSpeechSeq2Seq.from_pretrained(
2024-10-16 13:52:53 +00:00
model_name,
2024-10-10 14:51:48 +00:00
torch_dtype=torch.float32,
low_cpu_mem_usage=True,
use_safetensors=True
)
model.to("mps")
2024-10-16 13:52:53 +00:00
processor = AutoProcessor.from_pretrained(model_name)
2024-10-10 14:51:48 +00:00
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch.float32,
2024-10-10 15:03:15 +00:00
return_timestamps=True,
2024-10-10 14:51:48 +00:00
device="mps"
)
output = pipe(audio_path)["text"]
return output