101 lines
4.0 KiB
Python
101 lines
4.0 KiB
Python
|
from argparse import ArgumentParser
|
||
|
from uuid import uuid4
|
||
|
import os
|
||
|
import ffmpeg
|
||
|
import whisper
|
||
|
import spacy
|
||
|
from spacy.lang.en.stop_words import STOP_WORDS
|
||
|
from spacy_download import load_spacy
|
||
|
from string import punctuation
|
||
|
from heapq import nlargest
|
||
|
|
||
|
# TODO: Verbose debugging option
|
||
|
|
||
|
parser = ArgumentParser(description='Summarize a meeting')
|
||
|
parser.add_argument('-i', '--input', help='Input video file', required=True, dest='input')
|
||
|
parser.add_argument('-t', '--save-transcript', help='Output text transciption to file', dest='ftranscript')
|
||
|
parser.add_argument('-a', '--save-audio', help='Save audio file', dest='faudio')
|
||
|
parser.add_argument('-o', '--output', help='Output summary to file', required=True, dest='output')
|
||
|
parser.add_argument('-c', '--append', help='Append to output file', action='store_true', dest='append')
|
||
|
# parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true', dest='verbose')
|
||
|
parser.add_argument('-m', '--transcription-model', help='Whisper model to use', dest='model')
|
||
|
parser.add_argument('-s', '--summarization-model', help='Summarization model to use', dest='summarization_model')
|
||
|
parser.add_argument('-p', '--summarization-percentage', help='Percentage of text to summarize', dest='summarization_percentage', type=float, default=0.0125)
|
||
|
arguments = parser.parse_args()
|
||
|
|
||
|
# Strip Audio from input
|
||
|
audio_file_name = str(uuid4()) + '.wav'
|
||
|
if arguments.faudio is not None:
|
||
|
audio_file_name = arguments.faudio
|
||
|
if os.path.exists(audio_file_name):
|
||
|
os.remove(audio_file_name)
|
||
|
if not os.path.isfile(arguments.input):
|
||
|
print('Input must be a valid file')
|
||
|
exit(1)
|
||
|
ffmpeg.input(arguments.input).output(audio_file_name).run()
|
||
|
|
||
|
# Transcribe Audio
|
||
|
transcription_model = 'medium.en'
|
||
|
if arguments.model is not None:
|
||
|
transcription_model = arguments.model
|
||
|
transcription_model = whisper.load_model(transcription_model)
|
||
|
transcription = transcription_model.transcribe(audio_file_name)
|
||
|
transcription_text = ""
|
||
|
for sentence in transcription["segments"]:
|
||
|
transcription_text += sentence["text"] + '\n'
|
||
|
if arguments.ftranscript is not None:
|
||
|
if os.path.exists(arguments.ftranscript):
|
||
|
os.remove(arguments.ftranscript)
|
||
|
transcription_file = open(arguments.ftranscript, 'a')
|
||
|
transcription_file.write(transcription_text)
|
||
|
transcription_file.close()
|
||
|
|
||
|
# Summarize Text
|
||
|
summary_percentage = 0.0125
|
||
|
if arguments.summarization_percentage is not None:
|
||
|
summary_percentage = arguments.summarization_percentage
|
||
|
summary_model = 'en_core_web_lg'
|
||
|
if arguments.summarization_model is not None:
|
||
|
summary_model = arguments.summarization_model
|
||
|
# nlp = spacy.load(summary_model)
|
||
|
nlp = load_spacy(summary_model)
|
||
|
doc= nlp(transcription_text)
|
||
|
tokens=[token.text for token in doc]
|
||
|
word_frequencies={}
|
||
|
for word in doc:
|
||
|
if word.text.lower() not in list(STOP_WORDS):
|
||
|
if word.text.lower() not in punctuation:
|
||
|
if word.text not in word_frequencies.keys():
|
||
|
word_frequencies[word.text] = 1
|
||
|
else:
|
||
|
word_frequencies[word.text] += 1
|
||
|
max_frequency=max(word_frequencies.values())
|
||
|
for word in word_frequencies.keys():
|
||
|
word_frequencies[word]=word_frequencies[word]/max_frequency
|
||
|
sentence_tokens= [sent for sent in doc.sents]
|
||
|
sentence_scores = {}
|
||
|
for sent in sentence_tokens:
|
||
|
for word in sent:
|
||
|
if word.text.lower() in word_frequencies.keys():
|
||
|
if sent not in sentence_scores.keys():
|
||
|
sentence_scores[sent]=word_frequencies[word.text.lower()]
|
||
|
else:
|
||
|
sentence_scores[sent]+=word_frequencies[word.text.lower()]
|
||
|
select_length=int(len(sentence_tokens)*summary_percentage)
|
||
|
summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
|
||
|
final_summary=[word.text for word in summary]
|
||
|
summary=''.join(final_summary)
|
||
|
|
||
|
# Save Summary
|
||
|
if os.path.exists(arguments.output) and not arguments.append:
|
||
|
os.remove(arguments.output)
|
||
|
output_file = open(arguments.output, 'a')
|
||
|
output_file.write(summary)
|
||
|
output_file.close()
|
||
|
|
||
|
# Remove Unsaved Files
|
||
|
if arguments.faudio is None:
|
||
|
os.remove(audio_file_name)
|
||
|
|
||
|
exit(0)
|