-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2531 from taneemishere/Taneem_Jan
All machine learning algorithms are being moved to machine_learning directory plus SVM in python is also added.
- Loading branch information
Showing
93 changed files
with
167 additions
and
119 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Supprt Vector Machine | ||
|
||
The Supprt Vector Machine is one of the superivsed learning algorithm of machine learning that is | ||
used for both the classification and the regression problems. The main classification of the data | ||
points are done through by drawing the optimal hyperplane. But how would the hyperplane be determine | ||
as the optimal one. Well this algorithm does this drawing the supporting vetors the categories in | ||
the dataset. And the main hyperplane would be consider the optimal one that has the wider area between | ||
supporting vector. |
40 changes: 40 additions & 0 deletions
40
machine_learning/Support_Vector_Machine/Python/SVM_with_Sklearn.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# some imports | ||
|
||
from sklearn import datasets | ||
from sklearn.model_selection import train_test_split | ||
from sklearn import svm | ||
from sklearn import metrics | ||
|
||
# read the dataset from sklearn dataset | ||
cancer = datasets.load_breast_cancer() | ||
|
||
# See the features and label names of the dataset | ||
print("Features are: ", cancer.feature_names) | ||
print("Labels are: ", cancer.target_names) | ||
|
||
# Assign the values to X as featrues and to y the labels | ||
X = cancer.data | ||
y = cancer.target | ||
|
||
# Split the dataset into 80% and 20% for training and testing respectively | ||
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
|
||
# print(x_train, y_train) | ||
|
||
# these are the two classes of the label | ||
classes = ['malignant', 'benign'] | ||
|
||
# Support Vector Classifier of Support Vector Machine | ||
# Here the C is the Soft Margin for the SVM | ||
|
||
clf = svm.SVC(kernel="linear", C=2) | ||
clf.fit(x_train, y_train) | ||
|
||
# predict the values of training features | ||
y_pred = clf.predict(x_test) | ||
|
||
# seeing the acuuracy score of the model | ||
acc = metrics.accuracy_score(y_test, y_pred) | ||
|
||
print("Accuracy of SVC: ", acc) | ||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
22 changes: 11 additions & 11 deletions
22
...arning/python/transfer-learning/readme.md → ...arning/python/transfer-learning/readme.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,11 @@ | ||
# Transfer Learning | ||
In Transfer Learning, the knowledge of an already trained Machine Learning model is applied to a different but related problem. For | ||
example, if you trained a simple classifier to predict whether an image contains a backpack, you could use the knowledge that the | ||
model gained during its training to recognize other objects like sunglasses. With transfer learning, we basically try to exploit | ||
what has been learned in one task to improve generalization in another. We transfer the weights that a Network has learned at Task | ||
A to a new Task B.<br> | ||
The general idea is to use knowledge, that a model has learned from a task where a lot of labeled training data is available, in a | ||
new task where we don’t have a lot of data. Instead of starting the learning process from scratch, you start from patterns that | ||
have been learned from solving a related task. Transfer Learning is mostly used in Computer Vision and Natural Language Processing | ||
Tasks like Sentiment Analysis, because of the huge amount of computational power that is needed for them. | ||
data:image/s3,"s3://crabby-images/ded2b/ded2b5bfb8d3738f3b19155f2e86233ed78ae3a4" alt="alt workflow" | ||
# Transfer Learning | ||
In Transfer Learning, the knowledge of an already trained Machine Learning model is applied to a different but related problem. For | ||
example, if you trained a simple classifier to predict whether an image contains a backpack, you could use the knowledge that the | ||
model gained during its training to recognize other objects like sunglasses. With transfer learning, we basically try to exploit | ||
what has been learned in one task to improve generalization in another. We transfer the weights that a Network has learned at Task | ||
A to a new Task B.<br> | ||
The general idea is to use knowledge, that a model has learned from a task where a lot of labeled training data is available, in a | ||
new task where we don’t have a lot of data. Instead of starting the learning process from scratch, you start from patterns that | ||
have been learned from solving a related task. Transfer Learning is mostly used in Computer Vision and Natural Language Processing | ||
Tasks like Sentiment Analysis, because of the huge amount of computational power that is needed for them. | ||
data:image/s3,"s3://crabby-images/ded2b/ded2b5bfb8d3738f3b19155f2e86233ed78ae3a4" alt="alt workflow" |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
100 changes: 50 additions & 50 deletions
100
...is_twitter/Deep Learning/sentiment_cnn.py → ...is_twitter/Deep Learning/sentiment_cnn.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,50 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from keras.layers import Input, Dense, Bidirectional, Embedding, Dropout, Flatten | ||
from keras.layers import concatenate, SpatialDropout1D, GlobalAveragePooling1D, GlobalMaxPooling1D | ||
from keras.layers.convolutional import Conv1D | ||
from keras.layers.convolutional import MaxPooling1D | ||
from keras.models import Model | ||
from sklearn.model_selection import train_test_split | ||
from utils import * | ||
|
||
|
||
maxlen = 150 | ||
max_features = 2500 | ||
|
||
|
||
gop = pd.read_csv('Data/gop.csv') | ||
data = gop[['text','sentiment']] | ||
|
||
# Balance Negative - Positive tweets | ||
data[data['sentiment'] == 'Negative'] = data[data['sentiment'] == 'Negative'][:2236] | ||
data = data.dropna() | ||
|
||
data['sentiment'].value_counts() #Negative: 8493; Neutral: 3142; Positive: 2236 | ||
X, Y = format_data(data, max_features, maxlen) | ||
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42) | ||
|
||
|
||
# Input shape | ||
inp = Input(shape=(maxlen,)) | ||
|
||
# Embedding and CNN | ||
x = Embedding(max_features, 150)(inp) | ||
x = SpatialDropout1D(0.25)(x) | ||
x = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x) | ||
x = MaxPooling1D(pool_size=2)(x) | ||
x = Conv1D(filters=16, kernel_size=5, padding='same', activation='relu')(x) | ||
x = MaxPooling1D(pool_size=4)(x) | ||
x = Flatten()(x) | ||
|
||
# Output layer | ||
output = Dense(1, activation='sigmoid')(x) | ||
|
||
model = Model(inputs=inp, outputs=output) | ||
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) | ||
|
||
|
||
model.fit(X_train, Y_train, epochs=5, batch_size=32, verbose=1) | ||
|
||
results = model.predict(X_test, batch_size=1, verbose=1) | ||
run_test(results, Y_test) | ||
import numpy as np | ||
import pandas as pd | ||
from keras.layers import Input, Dense, Bidirectional, Embedding, Dropout, Flatten | ||
from keras.layers import concatenate, SpatialDropout1D, GlobalAveragePooling1D, GlobalMaxPooling1D | ||
from keras.layers.convolutional import Conv1D | ||
from keras.layers.convolutional import MaxPooling1D | ||
from keras.models import Model | ||
from sklearn.model_selection import train_test_split | ||
from utils import * | ||
|
||
|
||
maxlen = 150 | ||
max_features = 2500 | ||
|
||
|
||
gop = pd.read_csv('Data/gop.csv') | ||
data = gop[['text','sentiment']] | ||
|
||
# Balance Negative - Positive tweets | ||
data[data['sentiment'] == 'Negative'] = data[data['sentiment'] == 'Negative'][:2236] | ||
data = data.dropna() | ||
|
||
data['sentiment'].value_counts() #Negative: 8493; Neutral: 3142; Positive: 2236 | ||
X, Y = format_data(data, max_features, maxlen) | ||
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42) | ||
|
||
|
||
# Input shape | ||
inp = Input(shape=(maxlen,)) | ||
|
||
# Embedding and CNN | ||
x = Embedding(max_features, 150)(inp) | ||
x = SpatialDropout1D(0.25)(x) | ||
x = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x) | ||
x = MaxPooling1D(pool_size=2)(x) | ||
x = Conv1D(filters=16, kernel_size=5, padding='same', activation='relu')(x) | ||
x = MaxPooling1D(pool_size=4)(x) | ||
x = Flatten()(x) | ||
|
||
# Output layer | ||
output = Dense(1, activation='sigmoid')(x) | ||
|
||
model = Model(inputs=inp, outputs=output) | ||
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) | ||
|
||
|
||
model.fit(X_train, Y_train, epochs=5, batch_size=32, verbose=1) | ||
|
||
results = model.predict(X_test, batch_size=1, verbose=1) | ||
run_test(results, Y_test) |
116 changes: 58 additions & 58 deletions
116
..._twitter/Deep Learning/sentiment_utils.py → ..._twitter/Deep Learning/sentiment_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,59 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from keras.preprocessing.text import Tokenizer | ||
from keras.preprocessing.sequence import pad_sequences | ||
|
||
|
||
|
||
def format_data(data, max_features, maxlen): | ||
data = data[data.sentiment != "Neutral"] | ||
data = data.sample(frac=1).reset_index(drop=True) | ||
data['text'] = data['text'].apply(lambda x: x.lower()) | ||
|
||
Y = to_numerical(data['sentiment'].values) # 0: Negative; 1: Positive | ||
X = data['text'] | ||
|
||
remove_rt_url(X) | ||
|
||
tokenizer = Tokenizer(num_words=max_features) | ||
tokenizer.fit_on_texts(list(X)) | ||
|
||
X = tokenizer.texts_to_sequences(X) | ||
X = pad_sequences(X, maxlen=maxlen) | ||
|
||
return X, Y | ||
|
||
|
||
def to_numerical(d): | ||
"""Converts the categorical df[col] to numerical""" | ||
_, d = np.unique(d, return_inverse=True) | ||
return d | ||
|
||
|
||
def run_test(results, Y_validate): | ||
pos_correct, neg_correct, total_correct = 0, 0, 0 | ||
_, (neg_count, pos_count) = np.unique(Y_validate, return_counts=True) | ||
|
||
for i, r in enumerate(results): | ||
if r > 0.5: | ||
r = 1 | ||
else: | ||
r = 0 | ||
|
||
if r == Y_validate[i]: | ||
total_correct += 1 | ||
if r == 0: | ||
neg_correct += 1 | ||
else: | ||
pos_correct += 1 | ||
|
||
|
||
print('Positive Accuracy:', pos_correct/pos_count * 100, '%') | ||
print('Negative Accuracy:', neg_correct/neg_count * 100, '%') | ||
print('Total Accuracy:', total_correct/(pos_count + neg_count) * 100, '%') | ||
|
||
|
||
def remove_rt_url(df): | ||
url = r'((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)' | ||
df.replace(regex=True, inplace=True, to_replace=r'^RT ', value=r'') | ||
import pandas as pd | ||
import numpy as np | ||
from keras.preprocessing.text import Tokenizer | ||
from keras.preprocessing.sequence import pad_sequences | ||
|
||
|
||
|
||
def format_data(data, max_features, maxlen): | ||
data = data[data.sentiment != "Neutral"] | ||
data = data.sample(frac=1).reset_index(drop=True) | ||
data['text'] = data['text'].apply(lambda x: x.lower()) | ||
|
||
Y = to_numerical(data['sentiment'].values) # 0: Negative; 1: Positive | ||
X = data['text'] | ||
|
||
remove_rt_url(X) | ||
|
||
tokenizer = Tokenizer(num_words=max_features) | ||
tokenizer.fit_on_texts(list(X)) | ||
|
||
X = tokenizer.texts_to_sequences(X) | ||
X = pad_sequences(X, maxlen=maxlen) | ||
|
||
return X, Y | ||
|
||
|
||
def to_numerical(d): | ||
"""Converts the categorical df[col] to numerical""" | ||
_, d = np.unique(d, return_inverse=True) | ||
return d | ||
|
||
|
||
def run_test(results, Y_validate): | ||
pos_correct, neg_correct, total_correct = 0, 0, 0 | ||
_, (neg_count, pos_count) = np.unique(Y_validate, return_counts=True) | ||
|
||
for i, r in enumerate(results): | ||
if r > 0.5: | ||
r = 1 | ||
else: | ||
r = 0 | ||
|
||
if r == Y_validate[i]: | ||
total_correct += 1 | ||
if r == 0: | ||
neg_correct += 1 | ||
else: | ||
pos_correct += 1 | ||
|
||
|
||
print('Positive Accuracy:', pos_correct/pos_count * 100, '%') | ||
print('Negative Accuracy:', neg_correct/neg_count * 100, '%') | ||
print('Total Accuracy:', total_correct/(pos_count + neg_count) * 100, '%') | ||
|
||
|
||
def remove_rt_url(df): | ||
url = r'((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)' | ||
df.replace(regex=True, inplace=True, to_replace=r'^RT ', value=r'') | ||
df.replace(regex=True, inplace=True, to_replace=url, value=r'') |
File renamed without changes.
File renamed without changes.
File renamed without changes.