from argparse import ArgumentParser from uuid import uuid4 import os import ffmpeg import whisper import spacy from spacy.lang.en.stop_words import STOP_WORDS from spacy_download import load_spacy from string import punctuation from heapq import nlargest # TODO: Verbose debugging option parser = ArgumentParser(description='Summarize a meeting') parser.add_argument('-i', '--input', help='Input video file', required=True, dest='input') parser.add_argument('-t', '--save-transcript', help='Output text transciption to file', dest='ftranscript') parser.add_argument('-a', '--save-audio', help='Save audio file', dest='faudio') parser.add_argument('-o', '--output', help='Output summary to file', required=True, dest='output') parser.add_argument('-c', '--append', help='Append to output file', action='store_true', dest='append') # parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true', dest='verbose') parser.add_argument('-m', '--transcription-model', help='Whisper model to use', dest='model') parser.add_argument('-s', '--summarization-model', help='Summarization model to use', dest='summarization_model') parser.add_argument('-p', '--summarization-percentage', help='Percentage of text to summarize', dest='summarization_percentage', type=float, default=0.0125) arguments = parser.parse_args() # Strip Audio from input audio_file_name = str(uuid4()) + '.wav' if arguments.faudio is not None: audio_file_name = arguments.faudio if os.path.exists(audio_file_name): os.remove(audio_file_name) if not os.path.isfile(arguments.input): print('Input must be a valid file') exit(1) ffmpeg.input(arguments.input).output(audio_file_name).run() # Transcribe Audio transcription_model = 'medium.en' if arguments.model is not None: transcription_model = arguments.model transcription_model = whisper.load_model(transcription_model) transcription = transcription_model.transcribe(audio_file_name) transcription_text = "" for sentence in transcription["segments"]: transcription_text += sentence["text"] + '\n' if arguments.ftranscript is not None: if os.path.exists(arguments.ftranscript): os.remove(arguments.ftranscript) transcription_file = open(arguments.ftranscript, 'a') transcription_file.write(transcription_text) transcription_file.close() # Summarize Text summary_percentage = 0.0125 if arguments.summarization_percentage is not None: summary_percentage = arguments.summarization_percentage summary_model = 'en_core_web_lg' if arguments.summarization_model is not None: summary_model = arguments.summarization_model # nlp = spacy.load(summary_model) nlp = load_spacy(summary_model) doc= nlp(transcription_text) tokens=[token.text for token in doc] word_frequencies={} for word in doc: if word.text.lower() not in list(STOP_WORDS): if word.text.lower() not in punctuation: if word.text not in word_frequencies.keys(): word_frequencies[word.text] = 1 else: word_frequencies[word.text] += 1 max_frequency=max(word_frequencies.values()) for word in word_frequencies.keys(): word_frequencies[word]=word_frequencies[word]/max_frequency sentence_tokens= [sent for sent in doc.sents] sentence_scores = {} for sent in sentence_tokens: for word in sent: if word.text.lower() in word_frequencies.keys(): if sent not in sentence_scores.keys(): sentence_scores[sent]=word_frequencies[word.text.lower()] else: sentence_scores[sent]+=word_frequencies[word.text.lower()] select_length=int(len(sentence_tokens)*summary_percentage) summary=nlargest(select_length, sentence_scores,key=sentence_scores.get) final_summary=[word.text for word in summary] summary=''.join(final_summary) # Save Summary if os.path.exists(arguments.output) and not arguments.append: os.remove(arguments.output) output_file = open(arguments.output, 'a') output_file.write(summary) output_file.close() # Remove Unsaved Files if arguments.faudio is None: os.remove(audio_file_name) if arguments.ftranscript is None: os.remove(arguments.ftranscript) exit(0)