Initial Complete version

2024-04-16 13:07:19 -04:00
commit 73c4c1171a
3 changed files with 137 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,23 @@
+# Wazowski
+
+Meeting transcription and summarization, all running in local models without sending any data to the cloud.
+
+# Requirements
+
+- Python 3.10 with virtualenv
+- ffmpeg
+
+# Installation
+
+```bash
+virtualenv Wazowski --python=python3.10
+source Wazowski/bin/activate
+pip install -r requirements.txt
+python3 fetch_models.py
+```
+
+# Usage
+
+```bash
+python3 wazowski.py {meeting_video_recording}
+```
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,12 @@
+numba
+numpy
+torch
+tqdm
+more-itertools
+tiktoken
+triton>=2.0.0,<3;platform_machine=="x86_64" and sys_platform=="linux" or sys_platform=="linux2"
+bert-extractive-summarizer
+whisper @ git+https://github.com/openai/whisper.git
+ffmpeg-python
+spacy
+spacy_download
--- a/wazowski.py
+++ b/wazowski.py
@@ -0,0 +1,102 @@
+from argparse import ArgumentParser
+from uuid import uuid4
+import os
+import ffmpeg
+import whisper
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+from spacy_download import load_spacy
+from string import punctuation
+from heapq import nlargest
+
+# TODO: Verbose debugging option
+
+parser = ArgumentParser(description='Summarize a meeting')
+parser.add_argument('-i', '--input', help='Input video file', required=True, dest='input')
+parser.add_argument('-t', '--save-transcript', help='Output text transciption to file', dest='ftranscript')
+parser.add_argument('-a', '--save-audio', help='Save audio file', dest='faudio')
+parser.add_argument('-o', '--output', help='Output summary to file', required=True, dest='output')
+parser.add_argument('-c', '--append', help='Append to output file', action='store_true', dest='append')
+# parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true', dest='verbose')
+parser.add_argument('-m', '--transcription-model', help='Whisper model to use', dest='model')
+parser.add_argument('-s', '--summarization-model', help='Summarization model to use', dest='summarization_model')
+parser.add_argument('-p', '--summarization-percentage', help='Percentage of text to summarize', dest='summarization_percentage', type=float, default=0.0125)
+arguments = parser.parse_args()
+
+# Strip Audio from input
+audio_file_name = str(uuid4()) + '.wav'
+if arguments.faudio is not None:
+    audio_file_name = arguments.faudio
+if os.path.exists(audio_file_name):
+    os.remove(audio_file_name)
+if not os.path.isfile(arguments.input):
+    print('Input must be a valid file')
+    exit(1)
+ffmpeg.input(arguments.input).output(audio_file_name).run()
+
+# Transcribe Audio
+transcription_model = 'medium.en'
+if arguments.model is not None:
+    transcription_model = arguments.model
+transcription_model = whisper.load_model(transcription_model)
+transcription = transcription_model.transcribe(audio_file_name)
+transcription_text = ""
+for sentence in transcription["segments"]:
+    transcription_text += sentence["text"] + '\n'
+if arguments.ftranscript is not None:
+    if os.path.exists(arguments.ftranscript):
+        os.remove(arguments.ftranscript)
+    transcription_file = open(arguments.ftranscript, 'a')
+    transcription_file.write(transcription_text)
+    transcription_file.close()
+
+# Summarize Text
+summary_percentage = 0.0125
+if arguments.summarization_percentage is not None:
+    summary_percentage = arguments.summarization_percentage
+summary_model = 'en_core_web_lg'
+if arguments.summarization_model is not None:
+    summary_model = arguments.summarization_model
+# nlp = spacy.load(summary_model)
+nlp = load_spacy(summary_model)
+doc= nlp(transcription_text)
+tokens=[token.text for token in doc]
+word_frequencies={}
+for word in doc:
+    if word.text.lower() not in list(STOP_WORDS):
+        if word.text.lower() not in punctuation:
+            if word.text not in word_frequencies.keys():
+                word_frequencies[word.text] = 1
+            else:
+                word_frequencies[word.text] += 1
+max_frequency=max(word_frequencies.values())
+for word in word_frequencies.keys():
+    word_frequencies[word]=word_frequencies[word]/max_frequency
+sentence_tokens= [sent for sent in doc.sents]
+sentence_scores = {}
+for sent in sentence_tokens:
+    for word in sent:
+        if word.text.lower() in word_frequencies.keys():
+            if sent not in sentence_scores.keys():
+                sentence_scores[sent]=word_frequencies[word.text.lower()]
+            else:
+                sentence_scores[sent]+=word_frequencies[word.text.lower()]
+select_length=int(len(sentence_tokens)*summary_percentage)
+summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
+final_summary=[word.text for word in summary]
+summary=''.join(final_summary)
+
+# Save Summary
+if os.path.exists(arguments.output) and not arguments.append:
+    os.remove(arguments.output)
+output_file = open(arguments.output, 'a')
+output_file.write(summary)
+output_file.close()
+
+# Remove Unsaved Files
+if arguments.faudio is None:
+    os.remove(audio_file_name)
+if arguments.ftranscript is None:
+    os.remove(arguments.ftranscript)
+
+exit(0)