From 120c1f7abc8ac60b9c44cdca17807723e6209977 Mon Sep 17 00:00:00 2001 From: Elizabeth Cray Date: Thu, 10 Oct 2024 10:51:48 -0400 Subject: [PATCH] Project Alpha 1 --- .gitignore | 1 + README.md | 4 +++ audio.py | 12 ++++++++ meatgrinder.py | 33 ++++++++++++++++++++ requirements.txt | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ summarize.py | 6 ++++ transcribe.py | 21 +++++++++++++ 7 files changed, 157 insertions(+) create mode 100644 .gitignore create mode 100644 audio.py create mode 100644 meatgrinder.py create mode 100644 requirements.txt create mode 100644 summarize.py create mode 100644 transcribe.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bdaab25 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +env/ diff --git a/README.md b/README.md index d70697d..0018469 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ # Meatgrinder Goals: to transcribe and auto-summarize meeting recordings + +## Installation + +1. install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) diff --git a/audio.py b/audio.py new file mode 100644 index 0000000..afc7b9b --- /dev/null +++ b/audio.py @@ -0,0 +1,12 @@ +import ffmpeg +import uuid +import pathlib + +def process(input_media): + temp_dir = "./env/tmp" + pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) + temp_name = temp_dir + str(uuid.uuid4()) + ".wav" + stream = ffmpeg.input(input_media) + stream = ffmpeg.output(stream, temp_name) + ffmpeg.run(stream) + return temp_name diff --git a/meatgrinder.py b/meatgrinder.py new file mode 100644 index 0000000..b031f42 --- /dev/null +++ b/meatgrinder.py @@ -0,0 +1,33 @@ +import argparse +import os +import audio +import transcribe +import summarize + +parser = argparse.ArgumentParser(description='Transcribe and summarize meeting recordings') +parser.add_argument('input', type=str, help='Path to the input media') +parser.add_argument('--output', type=str, help='Path to the output file, will not print result to stdout if provided') +parser.add_argument('--force', action='store_true', help='Overwrite existing output file without asking') +args = parser.parse_args() + +if not args.input: + print("Please provide an input file") + exit(1) + +if args.output and os.path.isFile(args.output) and not args.force: + print("Output file already exists and will be overwritten") + if input("Continue? [y/N] ").lower() != "y": + exit(1) + + +audio_file = audio.process(args.input) + +transcription = transcribe.process(audio_file) + +summary = summarize.process(transcription) + +if args.output: + with open(args.output, 'w') as f: + f.write(summary) +else: + print(summary) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d72736f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,80 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: osx-arm64 +accelerate=1.0.0=pypi_0 +aiohappyeyeballs=2.4.3=pypi_0 +aiohttp=3.10.9=pypi_0 +aiosignal=1.3.1=pypi_0 +argparse=1.4.0=pypi_0 +async-timeout=4.0.3=pypi_0 +attrs=24.2.0=pypi_0 +audioread=3.0.1=pypi_0 +bzip2=1.0.8=h80987f9_6 +ca-certificates=2024.9.24=hca03da5_0 +certifi=2024.8.30=pypi_0 +cffi=1.17.1=pypi_0 +charset-normalizer=3.3.2=pypi_0 +datasets=3.0.1=pypi_0 +decorator=5.1.1=pypi_0 +dill=0.3.8=pypi_0 +filelock=3.16.1=pypi_0 +frozenlist=1.4.1=pypi_0 +fsspec=2024.6.1=pypi_0 +huggingface-hub=0.25.1=pypi_0 +idna=3.10=pypi_0 +jinja2=3.1.4=pypi_0 +joblib=1.4.2=pypi_0 +lazy-loader=0.4=pypi_0 +libffi=3.4.4=hca03da5_1 +librosa=0.10.2.post1=pypi_0 +llvmlite=0.43.0=pypi_0 +markupsafe=3.0.0=pypi_0 +mpmath=1.3.0=pypi_0 +msgpack=1.1.0=pypi_0 +multidict=6.1.0=pypi_0 +multiprocess=0.70.16=pypi_0 +ncurses=6.4=h313beb8_0 +networkx=3.3=pypi_0 +numba=0.60.0=pypi_0 +numpy=2.0.2=pypi_0 +openssl=3.0.15=h80987f9_0 +packaging=24.1=pypi_0 +pandas=2.2.3=pypi_0 +pip=24.2=py310hca03da5_0 +platformdirs=4.3.6=pypi_0 +pooch=1.8.2=pypi_0 +propcache=0.2.0=pypi_0 +psutil=6.0.0=pypi_0 +pyarrow=17.0.0=pypi_0 +pycparser=2.22=pypi_0 +python=3.10.12=hb885b13_0 +python-dateutil=2.9.0.post0=pypi_0 +pytz=2024.2=pypi_0 +pyyaml=6.0.2=pypi_0 +readline=8.2=h1a28f6b_0 +regex=2024.9.11=pypi_0 +requests=2.32.3=pypi_0 +safetensors=0.4.5=pypi_0 +scikit-learn=1.5.2=pypi_0 +scipy=1.14.1=pypi_0 +sentencepiece=0.2.0=pypi_0 +setuptools=75.1.0=py310hca03da5_0 +six=1.16.0=pypi_0 +soundfile=0.12.1=pypi_0 +soxr=0.5.0.post1=pypi_0 +sqlite=3.45.3=h80987f9_0 +sympy=1.13.3=pypi_0 +threadpoolctl=3.5.0=pypi_0 +tk=8.6.14=h6ba3021_0 +tokenizers=0.20.0=pypi_0 +torch=2.4.1=pypi_0 +tqdm=4.66.5=pypi_0 +transformers=4.45.2=pypi_0 +typing-extensions=4.12.2=pypi_0 +tzdata=2024.2=pypi_0 +urllib3=2.2.3=pypi_0 +wheel=0.44.0=py310hca03da5_0 +xxhash=3.5.0=pypi_0 +xz=5.4.6=h80987f9_1 +yarl=1.14.0=pypi_0 +zlib=1.2.13=h18a0788_1 diff --git a/summarize.py b/summarize.py new file mode 100644 index 0000000..c806873 --- /dev/null +++ b/summarize.py @@ -0,0 +1,6 @@ +from transformers import pipeline + +def process(input_text): + summarizer = pipeline("summarization", model="marianna13/flan-t5-base-summarization", max_length=200, min_length=10, device="mps") + output = summarizer(input_text)[0]['summary_text'] + return output diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..0b2027e --- /dev/null +++ b/transcribe.py @@ -0,0 +1,21 @@ +import torch +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline +def process(audio_path): + model = AutoModelForSpeechSeq2Seq.from_pretrained( + "openai/whisper-large-v3", + torch_dtype=torch.float32, + low_cpu_mem_usage=True, + use_safetensors=True + ) + model.to("mps") + processor = AutoProcessor.from_pretrained("openai/whisper-large-v3") + pipe = pipeline( + "automatic-speech-recognition", + model=model, + tokenizer=processor.tokenizer, + feature_extractor=processor.feature_extractor, + torch_dtype=torch.float32, + device="mps" + ) + output = pipe(audio_path)["text"] + return output