Merge pull request #4 from danguetta/open_ai

Open ai features
danguetta · May 19, 2024 · 8981346 · 8981346
2 parents 91ca21d + 20d130b
commit 8981346
Show file tree

Hide file tree

Showing 8 changed files with 245 additions and 241 deletions.
diff --git a/.github/mac_installer_template.sh b/.github/mac_installer_template.sh
@@ -38,8 +38,6 @@ EOF
     echo {{version_placeholder}} > "$INSTALL_DIR"/data/version
     printf "${YELLOW}Compiling packages${NC}\n"
     "$INSTALL_DIR"/bin/python -c "python_command_placeholder"
-    printf "${YELLOW}Downloading Word2Vec info${NC}\n"
-    curl -L -o "$INSTALL_DIR"/data/w2v_small.bin https://github.com/danguetta/xlkitlearn/releases/latest/download/w2v_small.bin
     printf "${YELLOW}Downloading add-in Excel${NC}\n"
     curl -L -o ~/Desktop/XLKitLearn.xltm "https://github.com/danguetta/XLKitLearn/releases/latest/download/XLKitLearn.xltm"
     printf "${GREEN}Successfully installed XLKitLearn!${NC}\n"

diff --git a/.github/workflows/prepare_release.py b/.github/workflows/prepare_release.py
@@ -72,7 +72,7 @@
     req_mac = '; '.join(['import ' + i.split('==')[0] for i in req_file if 'Windows' not in i])
     '''
 
-    req_run_string = "import xlwings; import pandas; import numpy; import scipy; import itertools; import time; import warnings; import keyword; import os; import signal; import sys; import patsy; import sklearn.feature_extraction.text; import nltk; warnings.filterwarnings('ignore', category = DeprecationWarning); from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso; from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier; from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor ;from sklearn.decomposition import LatentDirichletAllocation; from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor; from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor; import statsmodels.api as sm; from sklearn import model_selection as sk_ms; from sklearn.metrics import r2_score, roc_auc_score, roc_curve, make_scorer; import sklearn.inspection as sk_i; import matplotlib as mpl; import matplotlib.pyplot as plt; import seaborn as sns; import json; from datetime import datetime, timedelta; import hashlib; import requests"
+    req_run_string = "import xlwings; import pandas; import numpy; import scipy; import itertools; import time; import warnings; import keyword; import os; import signal; import sys; import patsy; import sklearn.feature_extraction.text; import nltk; warnings.filterwarnings('ignore', category = DeprecationWarning); from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso; from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier; from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor ;from sklearn.decomposition import LatentDirichletAllocation; from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor; from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor; import statsmodels.api as sm; from sklearn import model_selection as sk_ms; from sklearn.metrics import r2_score, roc_auc_score, roc_curve, make_scorer; import sklearn.inspection as sk_i; import matplotlib as mpl; import matplotlib.pyplot as plt; import seaborn as sns; import json; from datetime import datetime, timedelta; import hashlib; import requests; import tiktoken; import openai"
 
     # ----------------------------
     # -  Prep Windows Installer  -
@@ -91,61 +91,16 @@
         f.write(iss_file.replace('version_placeholder', version)
                         .replace('python_command_placeholder', req_run_string))
 
-    # -------------------------------------
-    # -   Create the mini Word2Vec file   -
-    # -------------------------------------
-
-    # Create a reduced version of Word2Vec for XLKit Learn
-
-    import gensim.downloader as downloader
-    import pandas as pd
-    import pickle
-    import unicodedata
-
-    # Get the model
-    original_stdout = sys.stdout
-    sys.stdout = open(os.devnull, 'w')
-
-    w2v = downloader.load('word2vec-google-news-300')
-
-    sys.stdout.close()
-    sys.stdout = original_stdout
-
-    # Convert it to a Pandas DataFrame
-    v_len = len(w2v.index_to_key)
-    df = pd.DataFrame({'w_id' : list(range(v_len))                                  ,
-                    'word' : list(w2v.index_to_key)                              ,
-                    'freq' : [w2v.get_vecattr(i, 'count') for i in range(v_len)]  })
-
-    # Create a lower case, non-accented verison of the word
-    df['word_lower'] = df.word.str.lower().apply(lambda x : ''.join([i for i in unicodedata.normalize('NFD', x) if not unicodedata.combining(i)]))
-    df_ag = df.groupby('word_lower').agg(ids     = ('w_id', list),
-                                        freqs   = ('freq', list),
-                                        av_freq = ('freq', 'max')).reset_index()
-    df_ag = df_ag.sort_values('av_freq', ascending=False)
-
-    # Only keep words
-    df_ag = df_ag[df_ag.word_lower.apply(lambda x : all(i in 'abcdefghijklmnopqrstuvwxyz0123456789' for i in x))]
-
-    # Only keep the top entries
-    df_ag_small = df_ag.head(100000)
-
-    # Output
-    out = {}
-    for i, row in df_ag_small.iterrows():
-        out[row.word_lower] = sum([w2v[ind]*freq for ind, freq in zip(row.ids, row.freqs)])/sum(row.freqs)
-
-    pickle.dump(out, open(os.path.join(env['pythonLocation'], 'data', 'w2v_small.bin'), 'wb'))
-
     # -----------------------
     # -  Prep Mac installer  -
     # ------------------------
 
     # Create a file with the version number for the release
     with open('version', 'w') as f:
-        f.write(version)
-
-    pickle.dump(out, open('w2v_small.bin', 'wb'))
+        # MANUALLY SETTING THIS TO 12.03 TO AVOID EXTRANEOUS WARNINGS; REMOVE AFTER SUMMER 2024 CLASS
+        # TODO
+        f.write('12.03')
+        #f.write(version)
 
     # Load the template installer
     with open(MAC_TEMPLATE, 'r') as f:

diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # XLKitLearn
-## Version: 12.03
+## Version: 13.01
 <!-- DO ***NOT*** EDIT ANYTHING ABOVE THIS LINE, INCLUDING THIS COMMENT -->
 
 This repo contains the latest version of [XLKitLearn](https://www.xlkitlearn.com). Please see the website for authorship, license, installation, and usage information - this repo provides information for those interested in seeing the add-in's code and/or contributing to it.