Skip to content

Commit

Permalink
Merge pull request #4 from danguetta/open_ai
Browse files Browse the repository at this point in the history
Open ai features
  • Loading branch information
danguetta authored May 19, 2024
2 parents 91ca21d + 20d130b commit 8981346
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 241 deletions.
2 changes: 0 additions & 2 deletions .github/mac_installer_template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ EOF
echo {{version_placeholder}} > "$INSTALL_DIR"/data/version
printf "${YELLOW}Compiling packages${NC}\n"
"$INSTALL_DIR"/bin/python -c "python_command_placeholder"
printf "${YELLOW}Downloading Word2Vec info${NC}\n"
curl -L -o "$INSTALL_DIR"/data/w2v_small.bin https://github.com/danguetta/xlkitlearn/releases/latest/download/w2v_small.bin
printf "${YELLOW}Downloading add-in Excel${NC}\n"
curl -L -o ~/Desktop/XLKitLearn.xltm "https://github.com/danguetta/XLKitLearn/releases/latest/download/XLKitLearn.xltm"
printf "${GREEN}Successfully installed XLKitLearn!${NC}\n"
Expand Down
55 changes: 5 additions & 50 deletions .github/workflows/prepare_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
req_mac = '; '.join(['import ' + i.split('==')[0] for i in req_file if 'Windows' not in i])
'''

req_run_string = "import xlwings; import pandas; import numpy; import scipy; import itertools; import time; import warnings; import keyword; import os; import signal; import sys; import patsy; import sklearn.feature_extraction.text; import nltk; warnings.filterwarnings('ignore', category = DeprecationWarning); from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso; from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier; from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor ;from sklearn.decomposition import LatentDirichletAllocation; from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor; from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor; import statsmodels.api as sm; from sklearn import model_selection as sk_ms; from sklearn.metrics import r2_score, roc_auc_score, roc_curve, make_scorer; import sklearn.inspection as sk_i; import matplotlib as mpl; import matplotlib.pyplot as plt; import seaborn as sns; import json; from datetime import datetime, timedelta; import hashlib; import requests"
req_run_string = "import xlwings; import pandas; import numpy; import scipy; import itertools; import time; import warnings; import keyword; import os; import signal; import sys; import patsy; import sklearn.feature_extraction.text; import nltk; warnings.filterwarnings('ignore', category = DeprecationWarning); from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso; from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier; from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor ;from sklearn.decomposition import LatentDirichletAllocation; from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor; from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor; import statsmodels.api as sm; from sklearn import model_selection as sk_ms; from sklearn.metrics import r2_score, roc_auc_score, roc_curve, make_scorer; import sklearn.inspection as sk_i; import matplotlib as mpl; import matplotlib.pyplot as plt; import seaborn as sns; import json; from datetime import datetime, timedelta; import hashlib; import requests; import tiktoken; import openai"

# ----------------------------
# - Prep Windows Installer -
Expand All @@ -91,61 +91,16 @@
f.write(iss_file.replace('version_placeholder', version)
.replace('python_command_placeholder', req_run_string))

# -------------------------------------
# - Create the mini Word2Vec file -
# -------------------------------------

# Create a reduced version of Word2Vec for XLKit Learn

import gensim.downloader as downloader
import pandas as pd
import pickle
import unicodedata

# Get the model
original_stdout = sys.stdout
sys.stdout = open(os.devnull, 'w')

w2v = downloader.load('word2vec-google-news-300')

sys.stdout.close()
sys.stdout = original_stdout

# Convert it to a Pandas DataFrame
v_len = len(w2v.index_to_key)
df = pd.DataFrame({'w_id' : list(range(v_len)) ,
'word' : list(w2v.index_to_key) ,
'freq' : [w2v.get_vecattr(i, 'count') for i in range(v_len)] })

# Create a lower case, non-accented verison of the word
df['word_lower'] = df.word.str.lower().apply(lambda x : ''.join([i for i in unicodedata.normalize('NFD', x) if not unicodedata.combining(i)]))
df_ag = df.groupby('word_lower').agg(ids = ('w_id', list),
freqs = ('freq', list),
av_freq = ('freq', 'max')).reset_index()
df_ag = df_ag.sort_values('av_freq', ascending=False)

# Only keep words
df_ag = df_ag[df_ag.word_lower.apply(lambda x : all(i in 'abcdefghijklmnopqrstuvwxyz0123456789' for i in x))]

# Only keep the top entries
df_ag_small = df_ag.head(100000)

# Output
out = {}
for i, row in df_ag_small.iterrows():
out[row.word_lower] = sum([w2v[ind]*freq for ind, freq in zip(row.ids, row.freqs)])/sum(row.freqs)

pickle.dump(out, open(os.path.join(env['pythonLocation'], 'data', 'w2v_small.bin'), 'wb'))

# -----------------------
# - Prep Mac installer -
# ------------------------

# Create a file with the version number for the release
with open('version', 'w') as f:
f.write(version)

pickle.dump(out, open('w2v_small.bin', 'wb'))
# MANUALLY SETTING THIS TO 12.03 TO AVOID EXTRANEOUS WARNINGS; REMOVE AFTER SUMMER 2024 CLASS
# TODO
f.write('12.03')
#f.write(version)

# Load the template installer
with open(MAC_TEMPLATE, 'r') as f:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# XLKitLearn
## Version: 12.03
## Version: 13.01
<!-- DO ***NOT*** EDIT ANYTHING ABOVE THIS LINE, INCLUDING THIS COMMENT -->

This repo contains the latest version of [XLKitLearn](https://www.xlkitlearn.com). Please see the website for authorship, license, installation, and usage information - this repo provides information for those interested in seeing the add-in's code and/or contributing to it.
Expand Down
Loading

0 comments on commit 8981346

Please sign in to comment.