Initial Complete version

This commit is contained in:
Elizabeth Cray 2024-04-16 13:07:19 -04:00
commit 73c4c1171a
3 changed files with 137 additions and 0 deletions

23
README.md Normal file
View File

@ -0,0 +1,23 @@
# Wazowski
Meeting transcription and summarization, all running in local models without sending any data to the cloud.
# Requirements
- Python 3.10 with virtualenv
- ffmpeg
# Installation
```bash
virtualenv Wazowski --python=python3.10
source Wazowski/bin/activate
pip install -r requirements.txt
python3 fetch_models.py
```
# Usage
```bash
python3 wazowski.py {meeting_video_recording}
```

12
requirements.txt Normal file
View File

@ -0,0 +1,12 @@
numba
numpy
torch
tqdm
more-itertools
tiktoken
triton>=2.0.0,<3;platform_machine=="x86_64" and sys_platform=="linux" or sys_platform=="linux2"
bert-extractive-summarizer
whisper @ git+https://github.com/openai/whisper.git
ffmpeg-python
spacy
spacy_download

102
wazowski.py Normal file
View File

@ -0,0 +1,102 @@
from argparse import ArgumentParser
from uuid import uuid4
import os
import ffmpeg
import whisper
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy_download import load_spacy
from string import punctuation
from heapq import nlargest
# TODO: Verbose debugging option
parser = ArgumentParser(description='Summarize a meeting')
parser.add_argument('-i', '--input', help='Input video file', required=True, dest='input')
parser.add_argument('-t', '--save-transcript', help='Output text transciption to file', dest='ftranscript')
parser.add_argument('-a', '--save-audio', help='Save audio file', dest='faudio')
parser.add_argument('-o', '--output', help='Output summary to file', required=True, dest='output')
parser.add_argument('-c', '--append', help='Append to output file', action='store_true', dest='append')
# parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true', dest='verbose')
parser.add_argument('-m', '--transcription-model', help='Whisper model to use', dest='model')
parser.add_argument('-s', '--summarization-model', help='Summarization model to use', dest='summarization_model')
parser.add_argument('-p', '--summarization-percentage', help='Percentage of text to summarize', dest='summarization_percentage', type=float, default=0.0125)
arguments = parser.parse_args()
# Strip Audio from input
audio_file_name = str(uuid4()) + '.wav'
if arguments.faudio is not None:
audio_file_name = arguments.faudio
if os.path.exists(audio_file_name):
os.remove(audio_file_name)
if not os.path.isfile(arguments.input):
print('Input must be a valid file')
exit(1)
ffmpeg.input(arguments.input).output(audio_file_name).run()
# Transcribe Audio
transcription_model = 'medium.en'
if arguments.model is not None:
transcription_model = arguments.model
transcription_model = whisper.load_model(transcription_model)
transcription = transcription_model.transcribe(audio_file_name)
transcription_text = ""
for sentence in transcription["segments"]:
transcription_text += sentence["text"] + '\n'
if arguments.ftranscript is not None:
if os.path.exists(arguments.ftranscript):
os.remove(arguments.ftranscript)
transcription_file = open(arguments.ftranscript, 'a')
transcription_file.write(transcription_text)
transcription_file.close()
# Summarize Text
summary_percentage = 0.0125
if arguments.summarization_percentage is not None:
summary_percentage = arguments.summarization_percentage
summary_model = 'en_core_web_lg'
if arguments.summarization_model is not None:
summary_model = arguments.summarization_model
# nlp = spacy.load(summary_model)
nlp = load_spacy(summary_model)
doc= nlp(transcription_text)
tokens=[token.text for token in doc]
word_frequencies={}
for word in doc:
if word.text.lower() not in list(STOP_WORDS):
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
max_frequency=max(word_frequencies.values())
for word in word_frequencies.keys():
word_frequencies[word]=word_frequencies[word]/max_frequency
sentence_tokens= [sent for sent in doc.sents]
sentence_scores = {}
for sent in sentence_tokens:
for word in sent:
if word.text.lower() in word_frequencies.keys():
if sent not in sentence_scores.keys():
sentence_scores[sent]=word_frequencies[word.text.lower()]
else:
sentence_scores[sent]+=word_frequencies[word.text.lower()]
select_length=int(len(sentence_tokens)*summary_percentage)
summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
final_summary=[word.text for word in summary]
summary=''.join(final_summary)
# Save Summary
if os.path.exists(arguments.output) and not arguments.append:
os.remove(arguments.output)
output_file = open(arguments.output, 'a')
output_file.write(summary)
output_file.close()
# Remove Unsaved Files
if arguments.faudio is None:
os.remove(audio_file_name)
if arguments.ftranscript is None:
os.remove(arguments.ftranscript)
exit(0)