commit 73c4c1171a8ca39a99d6a85c3b3cbf21d0763b4f Author: Elizabeth Cray Date: Tue Apr 16 13:07:19 2024 -0400 Initial Complete version diff --git a/README.md b/README.md new file mode 100644 index 0000000..fdccc6d --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# Wazowski + +Meeting transcription and summarization, all running in local models without sending any data to the cloud. + +# Requirements + +- Python 3.10 with virtualenv +- ffmpeg + +# Installation + +```bash +virtualenv Wazowski --python=python3.10 +source Wazowski/bin/activate +pip install -r requirements.txt +python3 fetch_models.py +``` + +# Usage + +```bash +python3 wazowski.py {meeting_video_recording} +``` diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ab1d38d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +numba +numpy +torch +tqdm +more-itertools +tiktoken +triton>=2.0.0,<3;platform_machine=="x86_64" and sys_platform=="linux" or sys_platform=="linux2" +bert-extractive-summarizer +whisper @ git+https://github.com/openai/whisper.git +ffmpeg-python +spacy +spacy_download diff --git a/wazowski.py b/wazowski.py new file mode 100644 index 0000000..cb807c9 --- /dev/null +++ b/wazowski.py @@ -0,0 +1,102 @@ +from argparse import ArgumentParser +from uuid import uuid4 +import os +import ffmpeg +import whisper +import spacy +from spacy.lang.en.stop_words import STOP_WORDS +from spacy_download import load_spacy +from string import punctuation +from heapq import nlargest + +# TODO: Verbose debugging option + +parser = ArgumentParser(description='Summarize a meeting') +parser.add_argument('-i', '--input', help='Input video file', required=True, dest='input') +parser.add_argument('-t', '--save-transcript', help='Output text transciption to file', dest='ftranscript') +parser.add_argument('-a', '--save-audio', help='Save audio file', dest='faudio') +parser.add_argument('-o', '--output', help='Output summary to file', required=True, dest='output') +parser.add_argument('-c', '--append', help='Append to output file', action='store_true', dest='append') +# parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true', dest='verbose') +parser.add_argument('-m', '--transcription-model', help='Whisper model to use', dest='model') +parser.add_argument('-s', '--summarization-model', help='Summarization model to use', dest='summarization_model') +parser.add_argument('-p', '--summarization-percentage', help='Percentage of text to summarize', dest='summarization_percentage', type=float, default=0.0125) +arguments = parser.parse_args() + +# Strip Audio from input +audio_file_name = str(uuid4()) + '.wav' +if arguments.faudio is not None: + audio_file_name = arguments.faudio +if os.path.exists(audio_file_name): + os.remove(audio_file_name) +if not os.path.isfile(arguments.input): + print('Input must be a valid file') + exit(1) +ffmpeg.input(arguments.input).output(audio_file_name).run() + +# Transcribe Audio +transcription_model = 'medium.en' +if arguments.model is not None: + transcription_model = arguments.model +transcription_model = whisper.load_model(transcription_model) +transcription = transcription_model.transcribe(audio_file_name) +transcription_text = "" +for sentence in transcription["segments"]: + transcription_text += sentence["text"] + '\n' +if arguments.ftranscript is not None: + if os.path.exists(arguments.ftranscript): + os.remove(arguments.ftranscript) + transcription_file = open(arguments.ftranscript, 'a') + transcription_file.write(transcription_text) + transcription_file.close() + +# Summarize Text +summary_percentage = 0.0125 +if arguments.summarization_percentage is not None: + summary_percentage = arguments.summarization_percentage +summary_model = 'en_core_web_lg' +if arguments.summarization_model is not None: + summary_model = arguments.summarization_model +# nlp = spacy.load(summary_model) +nlp = load_spacy(summary_model) +doc= nlp(transcription_text) +tokens=[token.text for token in doc] +word_frequencies={} +for word in doc: + if word.text.lower() not in list(STOP_WORDS): + if word.text.lower() not in punctuation: + if word.text not in word_frequencies.keys(): + word_frequencies[word.text] = 1 + else: + word_frequencies[word.text] += 1 +max_frequency=max(word_frequencies.values()) +for word in word_frequencies.keys(): + word_frequencies[word]=word_frequencies[word]/max_frequency +sentence_tokens= [sent for sent in doc.sents] +sentence_scores = {} +for sent in sentence_tokens: + for word in sent: + if word.text.lower() in word_frequencies.keys(): + if sent not in sentence_scores.keys(): + sentence_scores[sent]=word_frequencies[word.text.lower()] + else: + sentence_scores[sent]+=word_frequencies[word.text.lower()] +select_length=int(len(sentence_tokens)*summary_percentage) +summary=nlargest(select_length, sentence_scores,key=sentence_scores.get) +final_summary=[word.text for word in summary] +summary=''.join(final_summary) + +# Save Summary +if os.path.exists(arguments.output) and not arguments.append: + os.remove(arguments.output) +output_file = open(arguments.output, 'a') +output_file.write(summary) +output_file.close() + +# Remove Unsaved Files +if arguments.faudio is None: + os.remove(audio_file_name) +if arguments.ftranscript is None: + os.remove(arguments.ftranscript) + +exit(0)