import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline def process(audio_path): model = AutoModelForSpeechSeq2Seq.from_pretrained( "openai/whisper-large-v3", torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True ) model.to("mps") processor = AutoProcessor.from_pretrained("openai/whisper-large-v3") pipe = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, torch_dtype=torch.float32, return_timestamps=True, device="mps" ) output = pipe(audio_path)["text"] return output