Improvements (#33)

rhohndorf · Jan 28, 2024 · 974f5bf · 974f5bf
1 parent 54b0cb8
commit 974f5bf
Show file tree

Hide file tree

Showing 7 changed files with 12 additions and 10 deletions.
diff --git a/.env.template b/.env.template
@@ -33,6 +33,7 @@ FAST_LLM_MODEL=./models/your_fast_model.bin
 FAST_TOKEN_LIMIT=1500
 SMART_TOKEN_LIMIT=2000
 EMBED_DIM=5120 #if you dont know the value, check this variable in your terminal llama_model_load_internal: n_embd
+GPU_LAYERS=40 # number of GPU layers. If you get a CUDA out of memory error, try reducing this number
 
 ################################################################################
 ### MEMORY

diff --git a/.gitignore b/.gitignore
@@ -22,6 +22,8 @@ logs
 *.log
 *.mp3
 
+models/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/scripts/commands.py b/scripts/commands.py
@@ -10,7 +10,7 @@
 from execute_code import execute_python_file
 from json_parser import fix_and_parse_json
 from image_gen import generate_image
-from duckduckgo_search import ddg
+from duckduckgo_search import DDGS
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
 
@@ -125,8 +125,9 @@ def get_datetime():
 def google_search(query, num_results=8):
     """Return the results of a google search"""
     search_results = []
-    for j in ddg(query, max_results=num_results):
-        search_results.append(j)
+    with DDGS() as ddgs:
+        for j in ddgs(query, max_results=num_results):
+            search_results.append(j)
 
     return json.dumps(search_results, ensure_ascii=False, indent=4)
 

diff --git a/scripts/config.py b/scripts/config.py
@@ -42,6 +42,8 @@ def __init__(self):
         self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 1500))
         self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 2000))
 
+        self.gpu_layers = int(os.getenv("GPU_LAYERS"))
+
         self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
 
         self.use_mac_os_tts = False

diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt
@@ -40,7 +40,7 @@ PERFORMANCE EVALUATION:
 3. Reflect on past decisions and strategies to refine your approach.
 4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
 
-Only respond in JSON format as described below and nothing else!
+Only respond in JSON format as described below (changing the arguments) and nothing else!
 
 {
     "thoughts":

diff --git a/scripts/llm_utils.py b/scripts/llm_utils.py
@@ -4,7 +4,7 @@
 
 cfg = Config()
 grammar = LlamaGrammar.from_file("./grammars/json.gbnf")
-llm = Llama(model_path=cfg.smart_llm_model, n_ctx=2048, embedding=True, n_gpu_layers=40)
+llm = Llama(model_path=cfg.smart_llm_model, n_ctx=2048, embedding=True, n_gpu_layers=cfg.gpu_layers)
 
 
 

diff --git a/scripts/main.py b/scripts/main.py
@@ -75,7 +75,7 @@ def print_assistant_thoughts(assistant_reply):
     global cfg
     try:
         # Parse and print Assistant response
-        print("Assistent Reply", assistant_reply)
+        print("Assistant Reply", assistant_reply)
         assistant_reply_json = fix_and_parse_json(assistant_reply)
 
         # Check if assistant_reply_json is a string and attempt to parse it into a JSON object
@@ -299,10 +299,6 @@ def parse_arguments():
         print_to_console("Fast LLM: ", Fore.GREEN, "ENABLED")
         cfg.set_smart_llm_model(cfg.fast_llm_model)
 
-    if args.debug:
-        print_to_console("Debug Mode: ", Fore.GREEN, "ENABLED")
-        cfg.set_debug_mode(True)
-
 
 # TODO: fill in llm values here
 # check_openai_api_key()
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,6 +22,8 @@ logs @@
     *.log
     *.mp3
+    models/
     # Byte-compiled / optimized / DLL files
     __pycache__/
     *.py[cod]
@@ Expand Down @@