Wazowski/wazowski.py
2024-04-16 13:33:27 -04:00

101 lines
4.0 KiB
Python

from argparse import ArgumentParser
from uuid import uuid4
import os
import ffmpeg
import whisper
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy_download import load_spacy
from string import punctuation
from heapq import nlargest
# TODO: Verbose debugging option
parser = ArgumentParser(description='Summarize a meeting')
parser.add_argument('-i', '--input', help='Input video file', required=True, dest='input')
parser.add_argument('-t', '--save-transcript', help='Output text transciption to file', dest='ftranscript')
parser.add_argument('-a', '--save-audio', help='Save audio file', dest='faudio')
parser.add_argument('-o', '--output', help='Output summary to file', required=True, dest='output')
parser.add_argument('-c', '--append', help='Append to output file', action='store_true', dest='append')
# parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true', dest='verbose')
parser.add_argument('-m', '--transcription-model', help='Whisper model to use', dest='model')
parser.add_argument('-s', '--summarization-model', help='Summarization model to use', dest='summarization_model')
parser.add_argument('-p', '--summarization-percentage', help='Percentage of text to summarize', dest='summarization_percentage', type=float, default=0.0125)
arguments = parser.parse_args()
# Strip Audio from input
audio_file_name = str(uuid4()) + '.wav'
if arguments.faudio is not None:
audio_file_name = arguments.faudio
if os.path.exists(audio_file_name):
os.remove(audio_file_name)
if not os.path.isfile(arguments.input):
print('Input must be a valid file')
exit(1)
ffmpeg.input(arguments.input).output(audio_file_name).run()
# Transcribe Audio
transcription_model = 'medium.en'
if arguments.model is not None:
transcription_model = arguments.model
transcription_model = whisper.load_model(transcription_model)
transcription = transcription_model.transcribe(audio_file_name)
transcription_text = ""
for sentence in transcription["segments"]:
transcription_text += sentence["text"] + '\n'
if arguments.ftranscript is not None:
if os.path.exists(arguments.ftranscript):
os.remove(arguments.ftranscript)
transcription_file = open(arguments.ftranscript, 'a')
transcription_file.write(transcription_text)
transcription_file.close()
# Summarize Text
summary_percentage = 0.0125
if arguments.summarization_percentage is not None:
summary_percentage = arguments.summarization_percentage
summary_model = 'en_core_web_lg'
if arguments.summarization_model is not None:
summary_model = arguments.summarization_model
# nlp = spacy.load(summary_model)
nlp = load_spacy(summary_model)
doc= nlp(transcription_text)
tokens=[token.text for token in doc]
word_frequencies={}
for word in doc:
if word.text.lower() not in list(STOP_WORDS):
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
max_frequency=max(word_frequencies.values())
for word in word_frequencies.keys():
word_frequencies[word]=word_frequencies[word]/max_frequency
sentence_tokens= [sent for sent in doc.sents]
sentence_scores = {}
for sent in sentence_tokens:
for word in sent:
if word.text.lower() in word_frequencies.keys():
if sent not in sentence_scores.keys():
sentence_scores[sent]=word_frequencies[word.text.lower()]
else:
sentence_scores[sent]+=word_frequencies[word.text.lower()]
select_length=int(len(sentence_tokens)*summary_percentage)
summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
final_summary=[word.text for word in summary]
summary=''.join(final_summary)
# Save Summary
if os.path.exists(arguments.output) and not arguments.append:
os.remove(arguments.output)
output_file = open(arguments.output, 'a')
output_file.write(summary)
output_file.close()
# Remove Unsaved Files
if arguments.faudio is None:
os.remove(audio_file_name)
exit(0)