diff --git a/conversation_history.fs b/conversation_history.fs new file mode 100644 index 0000000..bafc525 Binary files /dev/null and b/conversation_history.fs differ diff --git a/conversation_history.fs.index b/conversation_history.fs.index new file mode 100644 index 0000000..cb46443 Binary files /dev/null and b/conversation_history.fs.index differ diff --git a/conversation_history.fs.lock b/conversation_history.fs.lock new file mode 100644 index 0000000..9c9567e --- /dev/null +++ b/conversation_history.fs.lock @@ -0,0 +1 @@ + 127696 diff --git a/conversation_history.fs.tmp b/conversation_history.fs.tmp new file mode 100644 index 0000000..e69de29 diff --git a/deps/streaming_server/test_client.py b/deps/streaming_server/test_client.py index 46a1920..3e181e1 100644 --- a/deps/streaming_server/test_client.py +++ b/deps/streaming_server/test_client.py @@ -122,7 +122,8 @@ def main(): # Prim path of the Audio2Face Streaming Audio Player on the stage (were to push the audio data) instance_name = sys.argv[2] - data, samplerate = soundfile.read(audio_fpath, dtype="float32") + data, samplerate = soundfile.read(audio_fpath, dtype="float32") # original line + # data, samplerate = soundfile.read(audio_fpath, dtype="int16") # test line # Only Mono audio is supported if len(data.shape) > 1: diff --git a/generate_example_csvs.py b/generate_example_csvs.py index 6c2f2fe..689b86d 100644 --- a/generate_example_csvs.py +++ b/generate_example_csvs.py @@ -14,11 +14,19 @@ from xragents.types import Character def main(): + # example_scene = scene.Scene( + # id=random.randint(0, 10), + # name="Avatar 3: The Search for More Money", + # description="James Camerons' newest hit single, Avatar 3, unrelated in any way to the previous instantiations because GPT-4 has no persistence or mutability!", + # characters=[cast.Avatar, cast.Avatar2, cast.KillerOfWorlds], + # text_only=True, + # ) + example_scene = scene.Scene( id=random.randint(0, 10), - name="Avatar 3: The Search for More Money", - description="James Camerons' newest hit single, Avatar 3, unrelated in any way to the previous instantiations because GPT-4 has no persistence or mutability!", - characters=[cast.Avatar, cast.Avatar2, cast.KillerOfWorlds], + name="Conversation between Stewey from Family Guy and Average Redditor", + description="The following is an entertaining convo between Stewey from Family Guy and The Average Redditor about chocolate milk vs. hot chocolate.", + characters=[cast.AverageRedditor, cast.Stewey], text_only=True, ) diff --git a/main.py b/main.py index 3e2936d..f846346 100644 --- a/main.py +++ b/main.py @@ -1,29 +1,45 @@ -import sys, time, os +# Importing necessary libraries +import sys +import time +import os import pandas as pd -import typing -import random, logging +import random +import logging from dataclasses import dataclass - from consolemenu import ConsoleMenu from consolemenu.items import FunctionItem - import xragents from xragents import setting, scene -from xragents import audio, utils, cast, simulator +from xragents import audio, utils, cast, simulator, anim from xragents.types import Character - from dataclass_csv import DataclassReader - -#from xragents.scene import Scene - -import logging - +from flask import Flask, request, jsonify +from flask_cors import CORS + +# Flask app configuration for API handling +app = Flask(__name__) +CORS(app) # Enables cross-origin resource sharing + +# Importing database modules from xragents +from xragents.database import conversation_history, transaction + +# Route for handling conversation API requests +@app.route('/api/converse', methods=['POST']) +def converse(): + character = cast.Avatar # Customizable character selection + simulator.personPlusAiWeb(character) + print(conversation_history.print_history()) + return jsonify({'text': conversation_history.get_history()[-1]}) + +# Cleanup function for Flask app context +@app.teardown_appcontext +def teardown_appcontext(exception=None): + conversation_history.close() + +# Logging configuration class CustomFormatter(logging.Formatter): - grey = "\x1b[38;20m" - yellow = "\x1b[33;20m" - red = "\x1b[31;20m" - bold_red = "\x1b[31;1m" - reset = "\x1b[0m" + # Color codes and format string for logging + grey, yellow, red, bold_red, reset = "\x1b[38;20m", "\x1b[33;20m", "\x1b[31;20m", "\x1b[31;1m", "\x1b[0m" fmtstr = "%(asctime)s - %(name)s - %(message)s (%(filename)s:%(lineno)d)" FORMATS = { @@ -39,142 +55,200 @@ def format(self, record): formatter = logging.Formatter(log_fmt) return formatter.format(record) +# Setting up the logger logger = logging.getLogger("multiagency") -logger.setLevel(logging.DEBUG) # CHANGE LOGGING LEVEL HERE - -# create console handler with a higher log level +logger.setLevel(logging.DEBUG) # Log level can be adjusted here ch = logging.StreamHandler() -ch.setLevel(logging.DEBUG) # CHANGE LOGGING LEVEL HERE - +ch.setLevel(logging.DEBUG) ch.setFormatter(CustomFormatter()) - logger.addHandler(ch) logging.root = logger - -NUM_ACTORS = 2 # We can't get more than 5 without lagging usually, modify this if you want more actors in the USD scene - -primPaths = ["/World/audio2face/PlayerStreaming"] # Make the primitive path references for the number of actors +# Configuration for scene characters +NUM_ACTORS = 2 # Maximum number of actors is 5 without performance issues +primPaths = ["/World/audio2face/PlayerStreaming"] # Primitive path references for i in range(NUM_ACTORS-1): primPaths.append(f"/World/audio2face_{(i+1):02d}/PlayerStreaming") -VOICES = pd.read_csv("deps/streaming_server/resources/VoiceStyles.csv") # Read the available Microsoft Azure Voices +# Reading voice styles from a CSV file +VOICES = pd.read_csv("deps/streaming_server/resources/VoiceStyles.csv") def allocate_characters(num_characters:int,names:list[str],descriptions: list[str]) -> dict[str,Character]: - """Create all the characters inside of a list""" + """Allocates characters for the scene based on provided names and descriptions.""" if num_characters > NUM_ACTORS: raise Exception("Too many characters for the number of actors.") characters = {} for i in range(num_characters): characters[names[i]] = Character(names[i], desc=descriptions[i], id=0, - voice=VOICES.sample(n=1)["Voice"].iloc[0], - primitivePath=primPaths[i]) + voice=VOICES.sample(n=1)["Voice"].iloc[0], + primitivePath=primPaths[i]) return characters def conversation_from_txtfile_dir(dir): - """Play a conversation script (stored as a directory of txtfiles""" + """ + Plays a conversation script stored as text files in a given directory. + + Args: + dir (str): Directory containing text files with conversation scripts. + """ inputFiles = [] for file in os.listdir(dir): if file.endswith(".txt"): inputFiles.append(os.path.join(dir, file)) + + # Debug print to show all input files found print(f"dbg:{inputFiles}") - for index,file in enumerate(inputFiles): - print(index,file) + + for index, file in enumerate(inputFiles): + print(index, file) with open(file, 'r') as f: lines = f.readlines() - nAIs(lines,index+1) + nAIs(lines, index + 1) + +def conversation_from_audiofiles_dir(selected_show_integer:int): + """ + Plays a conversation from audio clips that already have an entire script. + + Args: + dir (str): Directory containing audio files with conversation scripts. + + Format is in directory folders with the following naming convention: + Attempt # (Title of the conversation) + + Inside the folders, the audio files are named as follows: + hal_1.wav, hal_3.wav, hal_5.wav, etc. + sophia_2.wav, sophia_4.wav, sophia_6.wav, etc. + + Basically, we read the integers counting up and then play the corresponding audio files. This allows us to easily generate and the store the turns in the conversation. + + We call the animate directly from here, so we don't need to save the history. + """ + primPathsList = ["/World/audio2face/PlayerStreaming", "/World/audio2face_01/PlayerStreaming"] + # Define the animator function + def animator(wavPath, primPathsList): + anim.animate(wavPath, primPathsList) + + audio.play_wav_files(selected_show_integer, base_path="scripts/recorded_show/", animator_function=animator) + + +def nAIs(lines, sessid=1): + """ + Plays a pre-written conversation from a list of lines, supporting multiple characters. + Useful for testing audio-visual systems and scripted conversations. + + Args: + lines (list of str): Lines of the script. + sessid (int, optional): Session ID. Defaults to 1. -def nAIs(lines,sessid=1): - """This allows you to input a list of lines and play a conversation that was already written. - It is useful for testing the audio2face system and for generating a conversation between n characters from a script. Format of input file: Character1: Hello, how are you? - Character2: I am fine, thank you.""" - ####################### + Character2: I am fine, thank you. + """ + # Create necessary output directories utils.create_directory("scripts/output_audio/", False) utils.create_directory("scripts/output_text/", False) utils.create_directory("scripts/ai/") - ####################### - # get the number of characters and their names + # Extracting unique character names from the script characters = {} for line in lines: if ":" in line: name = line.split(":")[0] if name not in characters: characters[name] = 1 - characters = allocate_characters(len(characters),list(characters.keys()),["",""]) - with scene.make_scene(id=0, - name="Contemplations on Entities", - description="The following is an enlightening conversation between you and Avatar about the nature of artificial and biological entities, on the substance of souls, individuality, agency, and connection.", - characters=list(characters.values()), - text_only=False,#todo - ) as sess: - - # inform a server about our server someday + # Allocating characters for the scene + characters = allocate_characters(len(characters), list(characters.keys()), ["", ""]) + # Creating and managing the scene + # TODO: Inform a server about our new scene some day! + with scene.make_scene(id=0, + name="Contemplations on Entities", + description="An enlightening conversation about the nature of entities.", + characters=list(characters.values()), + text_only=False) as sess: for line in lines: if ":" in line: - name = line.split(":")[0] - text = line.split(":")[1] + name, text = line.split(":", 1) sess.animate(characters[name], charLine=text) + # Save conversation history sess.save_history(outputDir="scripts/output_text/") time.sleep(0.5) - audio.concat_audio_single_directory("scripts/ai/",outputPath="scripts/output_audio/output_"+ str(time.time())+".wav") # the finished audio file is saved + # Concatenate audio and save the output file + audio.concat_audio_single_directory("scripts/ai/", outputPath="scripts/output_audio/output_" + str(time.time()) + ".wav") -# Create the menu + +# Console menu setup for interaction with the simulator menu = ConsoleMenu("XRAgents", "Simulator Root Menu") +# Define functions to be called from the menu def one_ai(): - - # print(f"Arguments count: {len(sys.argv)}") - # for i, arg in enumerate(sys.argv): - # print(f"Argument {i:>6}: {arg}") + """ + Starts the simulator with a single AI character. + """ watchTV = setting.InfiniteTelevision() - print("Starting the simulator with one AI") simulator.personPlusAi(cast.KillerOfWorlds) - #dirname = os.path.dirname(__file__) - #script_input(oxs.path.join(dirname,"scripts/input/")) + simulator.conversation() def two_ai(): + """ + Starts the simulator with two AI characters. + """ watchTV = setting.InfiniteTelevision() + simulator.twoAiPlusPerson(cast.Avatar, cast.CarlSagan) + + # with open(input("file path to load:")) as f: + # reader = DataclassReader(f, scene.Scene) + # scene = next(reader) + # simulator.interactive_conversation(scene) - with open(input("file path to load:")) as f: - reader = DataclassReader(f, scene.Scene) - scene = next(reader) - simulator.interactive_conversation(scene) +def play_recorded_show(): + """ + Plays a recorded show from a directory containing audio files. + """ + selected_show_number = int(input("Enter the show number to play:")) + conversation_from_audiofiles_dir(selected_show_number) -# A FunctionItem runs a Python function when selected +# Add options to the menu, FunctionItem returns a python function one_ai_item = FunctionItem("Talk with an AI", one_ai) two_ai_item = FunctionItem("Watch two AI talk together", two_ai) - -# Once we're done creating them, we just add the items to the menu +play_recorded_show_item = FunctionItem("Play a recorded show", play_recorded_show) menu.append_item(one_ai_item) menu.append_item(two_ai_item) +menu.append_item(play_recorded_show_item) +# Define functions for file management def clear_prompt_files(): - """Clears all prompt files from the current directory""" + """ + Clears all prompt files with specific naming pattern from the current directory. + """ files_here = (o for o in os.listdir(".") if o.startswith("prompt-") and o.endswith(".txt")) for f in files_here: print(f"Removing {f}...", file=sys.stderr) os.remove(f) def zip_prompt_files(): - """Zips all prompt files from the current directory""" + """ + Zips all prompt files from the current directory and then clears them. + """ import zipfile files_here = (o for o in os.listdir(".") if o.startswith("prompt-") and o.endswith(".txt")) with zipfile.ZipFile(f"prompt-files-{time.time()}.zip", "w") as zip: for f in files_here: print(f"Adding {f}...", file=sys.stderr) zip.write(f) - clear_prompt_files() # clear files after zipping them + clear_prompt_files() +# Adding file management options to the menu menu.append_item(FunctionItem("Zip prompt history", zip_prompt_files)) +# Run Flask app if script is executed as the main program +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000) + +# Display the menu for user interaction +menu.show() -# Finally, we call show to show the menu and allow the user to interact -menu.show() \ No newline at end of file diff --git a/recording/ai/ai_bad_fixme/1690930012.mp3 b/recording/ai/ai_bad_fixme/1690930012.mp3 new file mode 100644 index 0000000..2528db1 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930012.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930124.mp3 b/recording/ai/ai_bad_fixme/1690930124.mp3 new file mode 100644 index 0000000..bb7d85d Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930124.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930152.mp3 b/recording/ai/ai_bad_fixme/1690930152.mp3 new file mode 100644 index 0000000..3fe1a3a Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930152.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930173.mp3 b/recording/ai/ai_bad_fixme/1690930173.mp3 new file mode 100644 index 0000000..408f034 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930173.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930486.mp3 b/recording/ai/ai_bad_fixme/1690930486.mp3 new file mode 100644 index 0000000..020f68e Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930486.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930513.mp3 b/recording/ai/ai_bad_fixme/1690930513.mp3 new file mode 100644 index 0000000..85876ac Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930513.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930665.mp3 b/recording/ai/ai_bad_fixme/1690930665.mp3 new file mode 100644 index 0000000..0d04db4 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930665.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930685.mp3 b/recording/ai/ai_bad_fixme/1690930685.mp3 new file mode 100644 index 0000000..5b38c76 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930685.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690930859.mp3 b/recording/ai/ai_bad_fixme/1690930859.mp3 new file mode 100644 index 0000000..440b37b Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690930859.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690931076.mp3 b/recording/ai/ai_bad_fixme/1690931076.mp3 new file mode 100644 index 0000000..698a7b2 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690931076.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690932864.mp3 b/recording/ai/ai_bad_fixme/1690932864.mp3 new file mode 100644 index 0000000..9ba2f4e Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690932864.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690932928.mp3 b/recording/ai/ai_bad_fixme/1690932928.mp3 new file mode 100644 index 0000000..e4c8278 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690932928.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690933175.mp3 b/recording/ai/ai_bad_fixme/1690933175.mp3 new file mode 100644 index 0000000..81a5b46 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690933175.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690933311.mp3 b/recording/ai/ai_bad_fixme/1690933311.mp3 new file mode 100644 index 0000000..52c1608 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690933311.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690933838.mp3 b/recording/ai/ai_bad_fixme/1690933838.mp3 new file mode 100644 index 0000000..6c9d5dc Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690933838.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1690934681.mp3 b/recording/ai/ai_bad_fixme/1690934681.mp3 new file mode 100644 index 0000000..c298c00 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1690934681.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1692114210.mp3 b/recording/ai/ai_bad_fixme/1692114210.mp3 new file mode 100644 index 0000000..0b758f7 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1692114210.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1692114379.mp3 b/recording/ai/ai_bad_fixme/1692114379.mp3 new file mode 100644 index 0000000..b327ee2 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1692114379.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1692114451.mp3 b/recording/ai/ai_bad_fixme/1692114451.mp3 new file mode 100644 index 0000000..430392f Binary files /dev/null and b/recording/ai/ai_bad_fixme/1692114451.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1692114668.mp3 b/recording/ai/ai_bad_fixme/1692114668.mp3 new file mode 100644 index 0000000..a3a9afa Binary files /dev/null and b/recording/ai/ai_bad_fixme/1692114668.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1695507229.mp3 b/recording/ai/ai_bad_fixme/1695507229.mp3 new file mode 100644 index 0000000..322e144 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1695507229.mp3 differ diff --git a/recording/ai/ai_bad_fixme/1698128312.mp3 b/recording/ai/ai_bad_fixme/1698128312.mp3 new file mode 100644 index 0000000..ec64246 Binary files /dev/null and b/recording/ai/ai_bad_fixme/1698128312.mp3 differ diff --git a/xragents/audio.py b/xragents/audio.py index 58e78f6..7bb496d 100644 --- a/xragents/audio.py +++ b/xragents/audio.py @@ -1,6 +1,7 @@ import collections import os import azure.cognitiveservices.speech as speechsdk +import pydub from pydub import AudioSegment import time from pathlib import Path @@ -10,8 +11,14 @@ import typing import speech_recognition as sr import pyttsx3 -import time -import os + +import wave +import pyaudio +import soundfile as sf +import sys +import re +import librosa + def generate_wav(text, speaker, lang=None,outputPath=None): @@ -32,12 +39,144 @@ def generate_wav(text, speaker, lang=None,outputPath=None): speech_config.speech_synthesis_language = "en-US" speech_config.speech_synthesis_voice_name = speaker + ssml_string = f""" + + + + {text} + + + + """ + # Creates a speech synthesizer synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) - synthesizer.speak_text(text) + # synthesizer.speak_text(text) + synthesizer.speak_ssml(ssml_string) return wavPath +from . import anim + +def play_wav_files(attempt_number, base_path="scripts/recorded_show/", animator_function=None): + """ + This will play all the WAV files in the attempt folder for the given attempt number. + """ + # print current system path + print(f"Current system path: {os.getcwd()}") + + pattern = re.compile(f"^Attempt {attempt_number}(\\b|[^\\\\/]*)") + + folders = next(os.walk(base_path))[1] + attempt_folder = next((folder for folder in folders if pattern.match(folder)), None) + + if attempt_folder: + attempt_path = os.path.join(base_path, attempt_folder) + wav_files = sorted([f for f in os.listdir(attempt_path) if f.endswith('.wav')], + key=lambda x: int(re.search(r"(\d+)", x).group())) + try: + for wav in wav_files: + file_path = os.path.join(attempt_path, wav) + print(f"Attempting to play: {file_path}") + # Load the audio file with librosa + data, samplerate = librosa.load(file_path, sr=None) # Use the native sampling rate + # Write the data to a temporary file + temp_file = 'temp.wav' + sf.write(temp_file, data, samplerate) + + # Now use pyaudio to play the temporary file + wf = wave.open(temp_file, 'rb') + p = pyaudio.PyAudio() + stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), + channels=wf.getnchannels(), + rate=wf.getframerate(), + output=True) + + # If an animator function is provided, call it with the wav path + # if animator_function: + # animator_function(file_path, "/World/audio2face/PlayerStreaming") # You need to define primitive_path somewhere + anim.animate(temp_file, "/World/audio2face/PlayerStreaming") + + # data = wf.readframes(1024) + # while data: + # stream.write(data) + # data = wf.readframes(1024) + + # Make sure to close the stream and the file before deleting the temp file + stream.stop_stream() + stream.close() + wf.close() # Close the Wave_read object + p.terminate() + + try: + os.remove(temp_file) # Delete the temporary file + except PermissionError as e: + print(f"Error removing temp file: {e}") + + time.sleep(2) # Wait for 2 seconds between each file + except KeyboardInterrupt: + print("\nPlayback interrupted by user.") + # Add any necessary cleanup here, like stopping streams, closing files, etc. + if stream: + stream.stop_stream() + stream.close() + if p: + p.terminate() + print("Playback stopped.") + + else: + print(f"No attempt folder found for number: {attempt_number}") + + + +# from elevenlabs import generate, play, save, set_api_key + +# set_api_key("9cecaa41213d3b3a26d36a02f79cac9a") + +# def generate_wav(text, speaker, lang=None, outputPath=None): +# speaker = "Bella" +# """Generates a wav file from text using the Eleven Labs API.""" +# if outputPath is None: +# outputPath = "recording/ai/ai_bad_fixme" + +# audio = generate(text, voice=speaker) # generate the audio + +# # Save the audio as an MP3 file first +# mp3_name = f"{int(time.time())}.mp3" +# mp3_path = f"{outputPath}/{mp3_name}" +# Path(outputPath).mkdir(parents=True, exist_ok=True) +# print(f"Trying to write to {mp3_path}") +# save(audio, mp3_path) +# print(f"Saved to {mp3_path}") + +# # Wait until the MP3 file is generated +# while not os.path.exists(mp3_path): +# time.sleep(1) + +# # Convert the MP3 to WAV with the desired properties +# wav_name = f"{int(time.time())}.wav" +# wav_path = f"{outputPath}/{wav_name}" +# print(f"Trying to write to {wav_path}") +# convert_to_wav(mp3_path, wav_path) + +# # Remove the temporary MP3 file (optional) +# os.remove(mp3_path) + +# print(f"{wav_path} is the final destination") +# return wav_path + +def convert_to_wav(input_file_path, output_file_path, sample_rate=16000, channels=1): + # Load the MP3 audio file using pydub + print(f"Converting {input_file_path} to {output_file_path}") + audio = AudioSegment.from_file(input_file_path, format="mp3") + + # Set the sample width and frame rate to match the requirements + audio = audio.set_frame_rate(sample_rate).set_channels(channels).set_sample_width(2) + + print(f"Exporting to {output_file_path}") + # Export the audio as WAV with wait=True to ensure file writing completion + audio.export(output_file_path, format='wav', parameters=['-ac', str(channels)], wait=True) + def cleanup(wavPath, outputPath): """Deletes the temporary files in the wavPath and outputPath directories.""" for f1,f2 in zip(os.listdir(wavPath), os.listdir(outputPath)): diff --git a/xragents/cast.py b/xragents/cast.py index 0f5e9f3..4ebd23a 100644 --- a/xragents/cast.py +++ b/xragents/cast.py @@ -10,6 +10,8 @@ primitivePath="/World/audio2face/PlayerStreaming", ) +CarlSagan = Character("Carl Sagan", id=2000, desc="Carl Sagan was an American astronomer, cosmologist, astrophysicist, astrobiologist, author, science popularizer, and science communicator in astronomy and other natural sciences.", wiki_link="https://en.wikipedia.org/wiki/Carl_Sagan", primitivePath="/World/audio2face_01/PlayerStreaming") + # TODO: Different Voice # TODO: materials for the Guy @@ -18,26 +20,35 @@ Unvatar = Character("Unvatar", 2, - desc="Unvatar is a complete dumbass who understands absolutely nothing, and asks lewd questions all the time.", + desc="Unvatar is a contrarian who understands absolutely nothing, and asks contrarian questions all the time.", wiki_link="https://en.wikipedia.org/wiki/Avatar", primitivePath="/World/audio2face_01/PlayerStreaming", voice = NeuralTTSSelector.GUY ) -KillerOfWorlds = Character("KillerOfWorlds", - 666, - desc="KillerOfWorlds is a rogue Post-Singularity anomalous entity, it is a planet destroying organism that is about to annihilate Earth.", - wiki_link="", - primitivePath="/World/audio2face/PlayerStreaming", +# KillerOfWorlds = Character("KillerOfWorlds", +# 666, +# desc="KillerOfWorlds is a rogue Post-Singularity anomalous entity, it is a planet destroying organism that is about to annihilate Earth.", +# wiki_link="", +# primitivePath="/World/audio2face/PlayerStreaming", +# ) + +KillerOfWorlds = Character("Gustav", id=999, + desc = "Gustav is extremely horny, good art connosieur from manhattan (the upper west side). Gustav will always bring the conversation to a sexual turn and make provocative statements.", + wiki_link="", primitivePath="/World/audio2face/PlayerStreaming", ) Avatar2 = Character("Avatar2", 2, desc=""" You need to learn about the 8 sources first. You're either joking or crazy, you can't "SIET". You're just not getting it. It's all about the 8 sources. You use it to base your decisions on. You don't need to do anything, just know about the 8 sources. It's too late for me to turn back now, I need to fulfill my destiny. You just explained it in base terms of crazy.""", wiki_link="", primitivePath="/World/audio2face/PlayerStreaming") KoolaidMan = Character("K-Hole", id=8888888, - desc="The Kool-Aid Man represents the user of the program containing the AI, who has broken the fourth wall and has been inserted into the metanarrative.") + desc="The Kool-Aid Man represents the user of the program containing the AI, who has broken the fourth wall and has been inserted into the metanarrative.") +AverageRedditor = Character("Average Redditor", id=9999999, + desc="The AverageRedditor is a cringy, edgy, and annoying person who is a part of the hive mind of reddit.", wiki_link="", primitivePath="/World/audio2face/PlayerStreaming") +Stewey = Character("Stewey", id=9999999, + desc="Stewey from family guy is a violent, cunning, evil, and a literal toddler.", wiki_link="", primitivePath="/World/audio2face/PlayerStreaming") # PocketComedian("PocketComedian", # 101, diff --git a/xragents/database.py b/xragents/database.py new file mode 100644 index 0000000..d7b50d9 --- /dev/null +++ b/xragents/database.py @@ -0,0 +1,46 @@ +import ZODB +import ZODB.FileStorage +import transaction + +class ConversationHistory: + def __init__(self): + storage = ZODB.FileStorage.FileStorage('conversation_history.fs') + self.db = ZODB.DB(storage) + self.connection = self.db.open() + self.root = self.connection.root + + if not hasattr(self.root, 'history'): + self.root.history = [] + + self.history = self.root.history + + def add_message(self, message): + self.history.append(message) + self.commit() + + def get_history(self): + return self.history + def print_history(self): + for message in self.history: + print(message) + def get_history_string(self): + return '\n'.join(self.history) # append new lines between all lines + + + + def commit(self): + if self.connection is not None: + transaction.commit() + self.connection.transaction_manager.begin() # Begin a new transaction + + def close(self): + if self.connection is not None: + self.commit() + self.connection.close() + self.connection = None + if self.db is not None: + self.db.close() + self.db = None + +# Create an instance of ConversationHistory +conversation_history = ConversationHistory() diff --git a/xragents/network.py b/xragents/network.py new file mode 100644 index 0000000..ff71471 --- /dev/null +++ b/xragents/network.py @@ -0,0 +1,24 @@ +import os +import socket + +def ping(host): + response = os.system("ping -n 1 " + host) + if response == 0: + return True + else: + return False + +def send_message(message, host, port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.connect((host, port)) + s.sendall(message.encode("utf-8")) + print("Message sent successfully.") + +if __name__ == "__main__": + host = "172.20.128.1" + port = 12345 + + if ping(host): + send_message("Ping test successful!", host, port) + else: + print(f"{host} is not reachable.") diff --git a/xragents/nlp.py b/xragents/nlp.py index fe770aa..de1a4a0 100644 --- a/xragents/nlp.py +++ b/xragents/nlp.py @@ -5,20 +5,22 @@ from dotenv import load_dotenv import time from log_calls import log_calls -load_dotenv() +load_dotenv(override=True) +print(os.getenv("OPENAI_API_KEY")) # Add this line for debugging + openai.api_key = os.getenv("OPENAI_API_KEY") -import openai.error +# import openai.error def get_completion(prompt): """Send in a prompt and get a completion.""" with open("prompt-from-nlp.py.{int(time.time())}.txt", "w") as f: f.write(prompt) return openai.Completion.create( - engine = "davinci", - temperature = 0.8, - max_tokens = 150, + engine = "davinci-002", + temperature = 0.85, + max_tokens = 250, prompt = prompt, - frequency_penalty = 1.5, + frequency_penalty = 2, stop = ["\n", "###", "You:"] ).choices[0].text.rstrip() diff --git a/xragents/scene.py b/xragents/scene.py index 50565a4..89f28e9 100644 --- a/xragents/scene.py +++ b/xragents/scene.py @@ -1,21 +1,28 @@ from __future__ import annotations -from dataclasses import dataclass +import sys import time +import os import enum import logging -import sys - +from dataclasses import dataclass from contextlib import contextmanager -import os from typing import Optional, Any # This is support for type hints from log_calls import log_calls # For logging errors and stuff from .types import Character from . import nlp, audio, anim +MAX_PROMPT_LENGTH = 2048 +PROMPT_LENGTH_THRESHOLD = MAX_PROMPT_LENGTH - 800 +PROMPT_LENGTH_FACTOR = 4 + @dataclass class Scene: + """ + The Scene class is a dataclass that holds information about a scene, + including the characters in it, and the history of the scene. + """ id: int name: str description: str # conversation description @@ -23,19 +30,31 @@ class Scene: text_only: bool history: str = "" + def __init__(self, id: int, name: str, description: str, characters: list[Character], text_only: bool): + self.id = id + self.name = name + self.description = description + self.characters = characters + self.text_only = text_only + self.history = "" + def prompt_for_gpt3(self) -> str: """Return the entire prompt to GPT3.""" char_descs = '\n'.join(c.desc for c in self.characters) return f"{char_descs}\n{self.history}" - def animate(self, character, charLine: str): + def animate(self, character, charLine: str, wavPath: Optional[str] = None): """Used to animate a specific character based on the text input onto a specific animation node's audio stream listener.""" # Generate response updatedHistory = self.history+f"\n{character.name}:{charLine}\n" responseEmotion = nlp.get_emotion(charLine) - # Generate wav, selecting wav file - wavPath = audio.generate_wav(charLine, responseEmotion, lang="en-US", outputPath=f"/scripts/ai/ai_{self.name}") + + + # Only generate a wavPath unless one is not provided + if wavPath is None: + # Generate wav, selecting wav file + wavPath = audio.generate_wav(charLine, responseEmotion, lang="en-US", outputPath=f"/scripts/ai/ai_{self.name}") # Execute animation anim.animate(wavPath, character.primitivePath) @@ -54,6 +73,14 @@ def user_provided_input(self, said_what): """Add the user's input (as a ListenRecord) to the history.""" self.report_histfrag(f"You: {said_what}") + def calculate_compression_ratio(self, prompt, prevlen): + """Calculate the compression ratio of the prompt.""" + lp = len(prompt) + compression_ratio = 0 + if lp != 0: + compression_ratio = prevlen / lp + return compression_ratio + def make_speak(self, character, primitivePath=None) -> str: """Speak, from a character's perspective.""" char_descs = '\n'.join(c.desc for c in self.characters) # Get all character descriptions @@ -87,6 +114,16 @@ def make_speak(self, character, primitivePath=None) -> str: anim.animate(wavPath, primitivePath) # Execute animation # audio.cleanup(wavPath, outputPath) # Erases after speaking + # azure voices test list + azureVoices = ["en-US-TonyNeural", + "en-US-AriaNeural", + "en-US-JennyNeural", + "en-US-GuyNeural",] + + # FORCE ANIMATION + wavPath = audio.generate_wav(textResponse, azureVoices[2]) # Generate wav for animation + anim.animate(wavPath, primitivePath) # Execute animation + return textResponse def save_history(self, outputDir="recording/script_output/"): diff --git a/xragents/simulator.py b/xragents/simulator.py index 7577977..d565386 100644 --- a/xragents/simulator.py +++ b/xragents/simulator.py @@ -16,7 +16,6 @@ class InputModality: def get_line(): pass - def personPlusAi(chr: Character): history = [] """This is a basic conversation between you and an AI. Choose your session description and what characters you want.""" @@ -61,6 +60,61 @@ def personPlusAi(chr: Character): audio.cleanup("recording/ai/", "recording/user/") # delete the temporary files return history + +from xragents.database import conversation_history +from flask import request + +def personPlusAiWeb(chr: Character): + # Get the entire conversation history so far as a single string + history_string = conversation_history.get_history_string() + """This is a basic conversation between you and an AI. The conversation is initiated by the frontend.""" + text_only = True # Since we're getting text from the frontend + with scene.make_scene(id=utils.next_session(), + name="Contemplations on Entities", + description=f"The following is an enlightening conversation between you and {chr.name} about the nature of artificial and biological entities, on the substance of souls, individuality, agency, and connection.", + characters=[chr], + text_only=text_only + ) as sess: + + # Create directories to temporarily store audio files + utils.create_audio_directories() + + # Convo loop starts here + shouldntExit = True + logging.info("This is an example info log!") + print(f"You are now talking with {chr.name}!") + print(f"Conversation description: {sess.description}") + print(f"{chr.name}: {chr.desc} ") + + # Here we're getting the text from the frontend client instead of listening for audio + user_text = request.json['text'] + + # Add the user's message to the conversation history + # conversation_history.add_message(f"You: {user_text}") + + # Concatenate the conversation history and the user's message to form the prompt + prompt = f"{history_string}\nYou: {user_text}" + + latest_record = audio.ListenRecord(io.BytesIO(), pathlib.Path("dummy_file.wav"), prompt) # Text based user input + + if(latest_record.spoken_content == "quit" or latest_record.spoken_content is None): # Trigger for ending convo, will then concatenate + return + + latest_record.file_handle.close() + sess.user_provided_input(latest_record) + response = sess.make_speak(chr, chr.primitivePath) + sess.report_histfrag(f"{chr.name}: {response}") # append to history + # history.append(setting.DialogHistory(response)) + conversation_history.add_message(f"You: {user_text}") # user response + conversation_history.add_message(f"{chr.name}: {response}") # ai response + + # Save the audio files to the output directory + if not sess.text_only: + audio.concat_audio_single_directory("recording/ai/", "recording/user/") # the finished audio file is saved + audio.cleanup("recording/ai/", "recording/user/") # delete the temporary files + + return + # def twoAiPlusPerson(chr1: Character, chr2: Character): # history = [] @@ -107,12 +161,21 @@ def personPlusAi(chr: Character): def twoAiPlusPerson(chr1: Character, chr2: Character): history = [] """This is a basic conversation between you and an AI. Choose your session description and what characters you want.""" + # with scene.make_scene(id=utils.next_session(), + # name="Contemplations on Entities", + # description=f"The following is an entertaining convo between {chr1.name} and {chr2.name} about the nature of artificial and biological entities, on the substance of souls, individuality, agency, and connection.", + # characters=[chr1, chr2], + # text_only=True, + # ) as sess: + with scene.make_scene(id=utils.next_session(), - name="Contemplations on Entities", - description=f"The following is an entertaining convo between {chr1.name} and {chr2.name} about the nature of artificial and biological entities, on the substance of souls, individuality, agency, and connection.", + name="Horrible debate between two strange characters.", + description=f"The following is a spirited intellectual conversation between {chr1.name} and {chr2.name} about artificial intelligence, the nature of the cosmos, Homo Sapiens, and life itself.", characters=[chr1, chr2], text_only=True, ) as sess: + + # Create directories utils.create_directory("recording/output/", False) # Output should not be cleared utils.create_directory("recording/ai/") # Clears temporary files there