SharePad.io
About
Run
Collaborate
Share
Python
Python 2
C
C++
C#
Java
JavaScript
import openai, whisper, torch import re, json, os, time from google.oauth2 import service_account from googleapiclient.http import MediaIoBaseDownload from pyannote.audio import Pipeline from pydub import AudioSegment from googleapiclient.discovery import build import io from pyannote.audio.pipelines.utils.hook import ProgressHook import logging logging.basicConfig(level=logging.INFO) def remove_consecutive_speakers(speaker_dict): result = {} previous_speaker = None for timestamp, speaker in speaker_dict.items(): if speaker != previous_speaker: result[timestamp] = speaker previous_speaker = speaker return result def read_file_to_comma_separated_string(file_path): """Reads a file line by line and returns a comma-separated string of its contents.""" try: with open(file_path, "r", encoding="utf-8") as file: # Read lines, strip whitespace, and filter out empty lines lines = [line.strip() for line in file if line.strip()] return ",".join(lines) except FileNotFoundError: return f"Error: The file '{file_path}' was not found." except IOError as e: return f"Error reading the file: {e}" def response_gpt(role, input, model="gpt-4.1-2025-04-14"): chat_completion = client.chat.completions.create( messages=[ {'role': "system", "content": role}, {"role": "user", "content": input}], model=model, temperature=0.6, frequency_penalty=1, presence_penalty = 0.8 ) return chat_completion.choices[0].message.content def get_local_file_list(folder_path): """Return list of files in a local folder""" return [ {"name": f, "path": os.path.join(folder_path, f)} for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) ] def get_file_contents(file_path): """Return file contents as a list of lines""" with open(file_path, "r", encoding="utf-8") as f: return f.read().splitlines() def get_gdrive_file_list(folder_id, service): """Return list of files in a Google Drive folder""" query = f"'{folder_id}' in parents" results = service.files().list( q=query, fields="files(id, name, mimeType)" ).execute() return results.get("files", []) def get_gdrive_file(file_id, file_name): request = GOOGLE_SERVICE.files().get_media( fileId=file_id) fh = io.FileIO(FILE_LOCS['TMP_DIR'] +"\\"+file_name, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() print(f"Download progress: {int(status.progress() * 100)}%") def get_file_list(path_or_id): if os.path.exists(path_or_id): if os.path.isdir(path_or_id): return get_local_file_list(path_or_id) else: return get_file_contents(path_or_id) elif GOOGLE_SERVICE: return get_gdrive_file_list(path_or_id, GOOGLE_SERVICE) else: raise ValueError( f"'{path_or_id}' not found locally and no Google Drive service provided." ) #Constants with open('[место где держат все ключи]', 'r') as f: data = json.load(f) API_KEY = data['OpenAI'] DISCORD_KEY = data['Discord'] SCOPES = ['https://www.googleapis.com/auth/documents.readonly', 'https://www.googleapis.com/auth/drive'] SERVICE_ACCOUNT_INFO = service_account.Credentials.from_service_account_info( data['Google'], scopes=SCOPES ) GOOGLE_SERVICE = build('drive', 'v3', credentials=SERVICE_ACCOUNT_INFO) FILE_LOCS = { 'TMP_DIR': [временная локальная директория], 'Transcripts': [директория куда складывать транскрипты], 'Summaries': [директория куда складывать краткое содержание], 'AudioMP4': [директория где лежат записи игры в mp4. Если сразу писать игры в wav то можно оставить пустым ('')], 'Audio.WAV': [директория где будут хранитсься wav файлы. Программа их не удаляет, но потом их стоит почистить], 'Names List': [cписок имен важных персон и мест. С именами собственными у транскрипций плохо, это помогает чатжпт их исправлять на правильные, не очень хорошо. Можно оставить пустым ('')], } ROLE = 'You are asked to accurately summarize the transcript of a D&D game', PROMPT = """"Please summarize the following transcript. List all the events that happened and all the decisions made by speakers and/or characters. Be more specific, including names and locations. Some locations and names may be misspelled, use the list in Players.txt to correct spelling to the next best one""" #Intiating models torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=data['HUGGINGFACE']) pipeline = pipeline.to(torch.device("cuda")) model = whisper.load_model("small", device='cuda') client = openai.OpenAI(api_key=API_KEY) files_dict = transformed = {k: get_file_list(v) for k, v in FILE_LOCS.items()} for file in files_dict['AudioMP4']: if int(re.findall(r'\d+', file['name'][:-3])[0])<70: continue language_name = '' language = 'en' #default language timestamps={} print(file) start_time = time.time() file_name_base = file['name'][:-3] #change format to wav audio_path = file['path'] wav_path = FILE_LOCS['Audio.WAV']+'\\' + file_name_base + 'wav' audio_file = open(audio_path, "rb") if os.path.isfile(wav_path): print('wav file exists') else: print('Format Change') try: audio = AudioSegment.from_file(audio_path, format="m4a") except: print(file_name_base, ' cannot be read') continue audio.export(wav_path, format="wav") audio_wav = AudioSegment.from_file(wav_path, format="wav") #transliterate & dirarize if file_name_base in [f['name'][:-3] for f in files_dict['Transcripts']]: print('Transcript Exists') # download file file_id = [f["id"] for f in files_dict['Transcripts'] if f["name"] == file_name_base + 'txt'][0] get_gdrive_file(file_id, file_name_base+'txt') with open(FILE_LOCS['TMP_DIR'] + "\\" + file_name_base + 'txt', "r+", encoding="utf-8") as transcript: full_transcript = transcript.read() else: # check language first_segment = whisper.pad_or_trim((whisper.load_audio(wav_path))) _, probs = model.detect_language(whisper.log_mel_spectrogram(first_segment).to(model.device)) language = max(probs, key=probs.get) if language == 'en': language_name = 'English' elif language == 'ru': language_name = 'Russian' if language_name == 'Russian': print('Not ENG party') else: print("Diarizating") with ProgressHook() as hook: diarization = pipeline(wav_path, min_speakers=4, max_speakers=5, hook = hook ) for turn, _, speaker in diarization.itertracks(yield_label=True): timestamps[turn.start] = speaker timestamps = remove_consecutive_speakers(timestamps) prev_timestamp = 0 print("Transcribing") with open(FILE_LOCS['TMP_DIR'] + "\\" + file_name_base + 'txt', "w+", encoding="utf-8") as f: for timestamp, speaker in timestamps.items(): print('---' + str(prev_timestamp)) segment = audio_wav[prev_timestamp*1000-100:timestamp*1000] segment.export(FILE_LOCS['TMP_DIR'] + '\\' +file_name_base + '_segment.wav', format="wav") transcript = model.transcribe(FILE_LOCS['TMP_DIR'] + '\\' + file_name_base + '_segment.wav', language=language_name) f.write('\n' + speaker + ':\n' + transcript['text']) prev_timestamp = timestamp #file = GOOGLE_SERVICE.files().create( # body={ # 'name':file_name_base + 'txt', # 'parents': [FILE_LOCS['Transcripts']] # }, # media_body=MediaFileUpload(FILE_LOCS['TMP_DIR'] + "\\" + file_name_base + 'txt', resumable=True), # fields='id, name' #).execute() #summarize if file_name_base+ '_summary_'+ language +'_.txt' in [f['name'] for f in files_dict['Summaries']]: print('Summary exists') else: if language_name == '': first_segment = whisper.pad_or_trim((whisper.load_audio(wav_path))) _, probs = model.detect_language(whisper.log_mel_spectrogram(first_segment).to(model.device)) language = max(probs, key=probs.get) if language == 'en': language_name = 'English' elif language == 'ru': language_name = 'Russian' if language_name == 'Russian': print('Not ENG party') else: with open(FILE_LOCS['TMP_DIR'] + "\\" + file_name_base + 'txt', "r+", encoding="utf-8") as transcript: full_transcript = transcript.read() print('Summarizing') prompt = """Please summarize the following transcript of a D&D game. List all the events that happened and all the decisions made by characters. Be more specific. Identify speakers with characters, and indicate which characters were involved with or affected by which events and which locations were involved. """ if language == 'en': prompt = prompt + """Some names may be misspelled. Here's a partial list of names to help you correct potential misspellings + :""" + ';'.join(get_file_contents(FILE_LOCS['Names List'])) prompt = prompt + '\n TRANSCRIPT:' + full_transcript summary = response_gpt( role='Your job is to summarize the transcript into convenient notes. You will be rewarded if you do it well', input = prompt ) with open(FILE_LOCS['TMP_DIR'] + "\\" + file_name_base + '_summary_'+ language +'_.txt', 'w', errors='replace') as summary_file: summary_file.write(summary)
Input
Output
Click 'Run' to see program output.
Share this URL
×
Share this live link
×