Project Alpha 1
This commit is contained in:
parent
c03af29cc6
commit
120c1f7abc
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
env/
|
@ -1,3 +1,7 @@
|
|||||||
# Meatgrinder
|
# Meatgrinder
|
||||||
|
|
||||||
Goals: to transcribe and auto-summarize meeting recordings
|
Goals: to transcribe and auto-summarize meeting recordings
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
1. install [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
|
||||||
|
12
audio.py
Normal file
12
audio.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import ffmpeg
|
||||||
|
import uuid
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
def process(input_media):
|
||||||
|
temp_dir = "./env/tmp"
|
||||||
|
pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
temp_name = temp_dir + str(uuid.uuid4()) + ".wav"
|
||||||
|
stream = ffmpeg.input(input_media)
|
||||||
|
stream = ffmpeg.output(stream, temp_name)
|
||||||
|
ffmpeg.run(stream)
|
||||||
|
return temp_name
|
33
meatgrinder.py
Normal file
33
meatgrinder.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import audio
|
||||||
|
import transcribe
|
||||||
|
import summarize
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Transcribe and summarize meeting recordings')
|
||||||
|
parser.add_argument('input', type=str, help='Path to the input media')
|
||||||
|
parser.add_argument('--output', type=str, help='Path to the output file, will not print result to stdout if provided')
|
||||||
|
parser.add_argument('--force', action='store_true', help='Overwrite existing output file without asking')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.input:
|
||||||
|
print("Please provide an input file")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if args.output and os.path.isFile(args.output) and not args.force:
|
||||||
|
print("Output file already exists and will be overwritten")
|
||||||
|
if input("Continue? [y/N] ").lower() != "y":
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
audio_file = audio.process(args.input)
|
||||||
|
|
||||||
|
transcription = transcribe.process(audio_file)
|
||||||
|
|
||||||
|
summary = summarize.process(transcription)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, 'w') as f:
|
||||||
|
f.write(summary)
|
||||||
|
else:
|
||||||
|
print(summary)
|
80
requirements.txt
Normal file
80
requirements.txt
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# This file may be used to create an environment using:
|
||||||
|
# $ conda create --name <env> --file <this file>
|
||||||
|
# platform: osx-arm64
|
||||||
|
accelerate=1.0.0=pypi_0
|
||||||
|
aiohappyeyeballs=2.4.3=pypi_0
|
||||||
|
aiohttp=3.10.9=pypi_0
|
||||||
|
aiosignal=1.3.1=pypi_0
|
||||||
|
argparse=1.4.0=pypi_0
|
||||||
|
async-timeout=4.0.3=pypi_0
|
||||||
|
attrs=24.2.0=pypi_0
|
||||||
|
audioread=3.0.1=pypi_0
|
||||||
|
bzip2=1.0.8=h80987f9_6
|
||||||
|
ca-certificates=2024.9.24=hca03da5_0
|
||||||
|
certifi=2024.8.30=pypi_0
|
||||||
|
cffi=1.17.1=pypi_0
|
||||||
|
charset-normalizer=3.3.2=pypi_0
|
||||||
|
datasets=3.0.1=pypi_0
|
||||||
|
decorator=5.1.1=pypi_0
|
||||||
|
dill=0.3.8=pypi_0
|
||||||
|
filelock=3.16.1=pypi_0
|
||||||
|
frozenlist=1.4.1=pypi_0
|
||||||
|
fsspec=2024.6.1=pypi_0
|
||||||
|
huggingface-hub=0.25.1=pypi_0
|
||||||
|
idna=3.10=pypi_0
|
||||||
|
jinja2=3.1.4=pypi_0
|
||||||
|
joblib=1.4.2=pypi_0
|
||||||
|
lazy-loader=0.4=pypi_0
|
||||||
|
libffi=3.4.4=hca03da5_1
|
||||||
|
librosa=0.10.2.post1=pypi_0
|
||||||
|
llvmlite=0.43.0=pypi_0
|
||||||
|
markupsafe=3.0.0=pypi_0
|
||||||
|
mpmath=1.3.0=pypi_0
|
||||||
|
msgpack=1.1.0=pypi_0
|
||||||
|
multidict=6.1.0=pypi_0
|
||||||
|
multiprocess=0.70.16=pypi_0
|
||||||
|
ncurses=6.4=h313beb8_0
|
||||||
|
networkx=3.3=pypi_0
|
||||||
|
numba=0.60.0=pypi_0
|
||||||
|
numpy=2.0.2=pypi_0
|
||||||
|
openssl=3.0.15=h80987f9_0
|
||||||
|
packaging=24.1=pypi_0
|
||||||
|
pandas=2.2.3=pypi_0
|
||||||
|
pip=24.2=py310hca03da5_0
|
||||||
|
platformdirs=4.3.6=pypi_0
|
||||||
|
pooch=1.8.2=pypi_0
|
||||||
|
propcache=0.2.0=pypi_0
|
||||||
|
psutil=6.0.0=pypi_0
|
||||||
|
pyarrow=17.0.0=pypi_0
|
||||||
|
pycparser=2.22=pypi_0
|
||||||
|
python=3.10.12=hb885b13_0
|
||||||
|
python-dateutil=2.9.0.post0=pypi_0
|
||||||
|
pytz=2024.2=pypi_0
|
||||||
|
pyyaml=6.0.2=pypi_0
|
||||||
|
readline=8.2=h1a28f6b_0
|
||||||
|
regex=2024.9.11=pypi_0
|
||||||
|
requests=2.32.3=pypi_0
|
||||||
|
safetensors=0.4.5=pypi_0
|
||||||
|
scikit-learn=1.5.2=pypi_0
|
||||||
|
scipy=1.14.1=pypi_0
|
||||||
|
sentencepiece=0.2.0=pypi_0
|
||||||
|
setuptools=75.1.0=py310hca03da5_0
|
||||||
|
six=1.16.0=pypi_0
|
||||||
|
soundfile=0.12.1=pypi_0
|
||||||
|
soxr=0.5.0.post1=pypi_0
|
||||||
|
sqlite=3.45.3=h80987f9_0
|
||||||
|
sympy=1.13.3=pypi_0
|
||||||
|
threadpoolctl=3.5.0=pypi_0
|
||||||
|
tk=8.6.14=h6ba3021_0
|
||||||
|
tokenizers=0.20.0=pypi_0
|
||||||
|
torch=2.4.1=pypi_0
|
||||||
|
tqdm=4.66.5=pypi_0
|
||||||
|
transformers=4.45.2=pypi_0
|
||||||
|
typing-extensions=4.12.2=pypi_0
|
||||||
|
tzdata=2024.2=pypi_0
|
||||||
|
urllib3=2.2.3=pypi_0
|
||||||
|
wheel=0.44.0=py310hca03da5_0
|
||||||
|
xxhash=3.5.0=pypi_0
|
||||||
|
xz=5.4.6=h80987f9_1
|
||||||
|
yarl=1.14.0=pypi_0
|
||||||
|
zlib=1.2.13=h18a0788_1
|
6
summarize.py
Normal file
6
summarize.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
def process(input_text):
|
||||||
|
summarizer = pipeline("summarization", model="marianna13/flan-t5-base-summarization", max_length=200, min_length=10, device="mps")
|
||||||
|
output = summarizer(input_text)[0]['summary_text']
|
||||||
|
return output
|
21
transcribe.py
Normal file
21
transcribe.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
import torch
|
||||||
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
||||||
|
def process(audio_path):
|
||||||
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
||||||
|
"openai/whisper-large-v3",
|
||||||
|
torch_dtype=torch.float32,
|
||||||
|
low_cpu_mem_usage=True,
|
||||||
|
use_safetensors=True
|
||||||
|
)
|
||||||
|
model.to("mps")
|
||||||
|
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
|
||||||
|
pipe = pipeline(
|
||||||
|
"automatic-speech-recognition",
|
||||||
|
model=model,
|
||||||
|
tokenizer=processor.tokenizer,
|
||||||
|
feature_extractor=processor.feature_extractor,
|
||||||
|
torch_dtype=torch.float32,
|
||||||
|
device="mps"
|
||||||
|
)
|
||||||
|
output = pipe(audio_path)["text"]
|
||||||
|
return output
|
Loading…
Reference in New Issue
Block a user