Skip to content

Commit

Permalink
Staging (#107)
Browse files Browse the repository at this point in the history
* Merged kes-python-flask-app and file-handling branches
* Add Kaggle API, File parsing, Calculations, and GeneratePDF

Deployed on Heroku from Elizabeth's fork to get the 'analyze the dataset' button to run and return a report in analyse.html. Now, we are merging into Kes's branch to get the analyse.html page working on the heroku app that is meant to be for the project.

* Update Installation instructions

* Delete data.csv, Add report folder and kaggle info

Need to have .kaggle file for the app to have access to the kaggle api

Co-authored-by: “Kes <“[email protected]”>
Co-authored-by: sakshigupta265 <[email protected]>
Co-authored-by: kes cardoso <[email protected]>
Co-authored-by: wyang0216 <[email protected]>
Co-authored-by: sakshigupta265 <[email protected]>
  • Loading branch information
6 people authored Apr 13, 2021
1 parent c67a362 commit 800c3e6
Show file tree
Hide file tree
Showing 24 changed files with 841 additions and 163 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
__pycache__
venv
env.py
.kaggle
.vscode
.DS_Store
1 change: 1 addition & 0 deletions .kaggle/kaggle.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"username":"elizabethcrouther","key":"7a5c2300510b71d5ce544950879b0817"}
10 changes: 10 additions & 0 deletions ROI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import cv2

def region_of_interest(faces, img_copy):

for face in faces:
x,y,w,h = face
offset = 0
face_section = img_copy[y-offset:y+h+offset, x-offset:x+w+offset]

return face_section
31 changes: 22 additions & 9 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

# only importing this function prevents
# the whole .py file from executing on startup
from runTerminalCommands import startCommands

from runTerminalCommands import openFiles, findReadableFiles

if os.path.exists("env.py"):
import env
Expand Down Expand Up @@ -127,20 +128,32 @@ def analyse_data():
if fileString is not None:
split_filename = fileString.split('.com/')
fileString = split_filename[1]
reportMade = startCommands(fileString)
# With open('/Users/mac/IdeaProjects/datasetbucket/report.pdf', 'rb')
# as static_file """
if reportMade:

targetDataPath = os.path.join('https://github.com/eliboss/datasetbucket/raw/main/dataFiles', fileString)

time.sleep(6)
targetReportPath = os.path.join('https://github.com/eliboss/datasetbucket/raw/main/reportdir', 'report.pdf')
# reportMade =
# print('report made: ', reportMade)
#print(targetReportPath)

reportMade, reportName = openFiles(fileString, targetDataPath, targetReportPath)
if reportMade is not None:
print('report made: ', reportMade)
print('report name: ', reportName)
time.sleep(5)
#reportName = reportName+'.pdf'
#reportPath = os.path.join(reportMade, reportName)

try:
return send_file('/Users/mac/IdeaProjects/datasetbucket/report.pdf',
as_attachment=True)
return send_file(reportMade, as_attachment=True)
except:
return render_template("analyse.html",
dataToRender="Unable able to generate report")
dataToRender="Unable to generate report")
else:
return render_template("analyse.html",
dataToRender="Unable able to generate report")
dataToRender="Unable to generate report")

return render_template("analyse.html")


Expand Down
47 changes: 45 additions & 2 deletions calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from statistics import mean, variance
from sklearn.linear_model import LinearRegression

import matplotlib
import matplotlib.pyplot as plt


# calculate the mean of an array of data
def calcMean(data):
if data is None: return None
Expand Down Expand Up @@ -32,7 +36,7 @@ def calcLinearReg(npArray):

# calculates how many unique values are in an array
# TODO: make it work for a dict too
def calcUniquieValues(data):
def calcUniqueValues(data):
if data is None:
return
array = np.array(data)
Expand All @@ -42,4 +46,43 @@ def calcUniquieValues(data):
if d not in temparray:
temparray.append(d)
count += 1
return count
return count

# calculates the number of samples for each value
def calcBreakDown(data):
sent = "You have "
(unique, counts) = np.unique(data, return_counts=True)
freq = np.asarray((unique, counts))

freq = freq.T.astype(int)
count = 0
for i in freq:
sent += str(i[1]) + " samples labelled as " + str(i[0])
if count != len(freq) - 1:
count += 1
sent = sent + " and "
else:
sent = sent + "."

return sent

# displays a histogram for a feature
def calcHistogram(data, category):
title = category + " Histogram"
file_name = title + '.png'
num_bin = 0
num_unique_val = calcUniqueValues(data)
if num_unique_val < 10:
num_bin = num_unique_val
else:
num_bin = 10
matplotlib.use('agg')
plt.hist(data, bins=num_bin)
plt.title(title)
plt.xlabel(category)
plt.ylabel("Count")
plt.savefig(file_name)
plt.close()

return file_name

5 changes: 5 additions & 0 deletions dataFiles/do_not_delete.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
f_name,l_name
Sakshi,Gupta
Elizabeth,Crouther
Kes,Cardoso
William,Yang
58 changes: 58 additions & 0 deletions detectFace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import cv2

def draw_found_faces(detected, image, color: tuple):
for (x, y, width, height) in detected:
cv2.rectangle(
image,
(x, y),
(x + width, y + height),
color,
thickness=2
)

def detect_faces(img_path):
# creating haar cascade classifier

faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_alt.xml")

# not in use
profileCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_profileface.xml")

# reading image
img = cv2.imread(img_path)

# reducing the size of image to a standard 256x256 image
img = cv2.resize(img,(256,256))
img_copy = img.copy()

# converting to gray scale face (makes detection easier :D)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect faces in the image
front_faces = faceCascade.detectMultiScale(
gray_img,
scaleFactor=1.3,
minNeighbors=5,
)

profile_faces = profileCascade.detectMultiScale(
gray_img,
scaleFactor=1.3,
minNeighbors=5,
)

# Filter out profiles
# profiles_not_front_faces = [x for x in profile_faces if x not in front_faces]

# Draw rectangles around faces on the original, colored image
draw_found_faces(front_faces, img, (0, 255, 0)) # RGB - green
# draw_found_faces(profile_faces, img, (0, 0, 255)) # RGB - red

# showing image + rectangle
# cv2.imshow('image',img)

#Wait for any key before image disappears
# cv2.waitKey(0)
# cv2.destroyAllWindows()

return img_copy,front_faces
33 changes: 33 additions & 0 deletions extractDominantColor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import cv2
import getColorInformation
from sklearn.cluster import KMeans
import warnings


def extractDominantColor(image,number_of_colors,hasThresholding=False):

# Quick Fix Increase cluster counter to neglect the black(Read Article)
if hasThresholding == True:
number_of_colors +=1

# Taking Copy of the image
img = image.copy()

# Convert Image into RGB Colours Space
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

# Reshape Image
img = img.reshape((img.shape[0]*img.shape[1]) , 3)

#Initiate KMeans Object
estimator = KMeans(n_clusters=number_of_colors, random_state=0)

# Fit the image
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# cluster_data(data_arr)
estimator.fit(img)

# Get Colour Information
colorInformation = getColorInformation.getColorInformation(estimator.labels_,estimator.cluster_centers_,hasThresholding)
return colorInformation
57 changes: 57 additions & 0 deletions extractSkin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import cv2
import numpy as np

def extractSkin(image):
# Taking a copy of the image
img = image.copy()
# Converting from BGR Colours Space to HSV
img = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)

# Defining HSV Threadholds
lower_threshold = np.array([0, 48, 80], dtype=np.uint8)
upper_threshold = np.array([20, 255, 255], dtype=np.uint8)

# Single Channel mask,denoting presence of colours in the about threshold
skinMask = cv2.inRange(img,lower_threshold,upper_threshold)

# Cleaning up mask using Gaussian Filter
skinMask = cv2.GaussianBlur(skinMask,(3,3),0)

# Extracting skin from the threshold mask
skin = cv2.bitwise_and(img,img,mask=skinMask)

# Return the Skin image
return cv2.cvtColor(skin,cv2.COLOR_HSV2BGR)

def removeBlack(estimator_labels, estimator_cluster):


# Check for black
hasBlack = False

# Get the total number of occurance for each color
occurance_counter = Counter(estimator_labels)


# Quick lambda function to compare to lists
compare = lambda x, y: Counter(x) == Counter(y)

# Loop through the most common occuring color
for x in occurance_counter.most_common(len(estimator_cluster)):

# Quick List comprehension to convert each of RBG Numbers to int
color = [int(i) for i in estimator_cluster[x[0]].tolist() ]



# Check if the color is [0,0,0] that if it is black
if compare(color , [0,0,0]) == True:
# delete the occurance
del occurance_counter[x[0]]
# remove the cluster
hasBlack = True
estimator_cluster = np.delete(estimator_cluster,x[0],0)
break


return (occurance_counter,estimator_cluster,hasBlack)
40 changes: 40 additions & 0 deletions filePath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import glob
import os
import cv2
import main
import sys

def getPath(folder):

if sys.platform.startswith('darwin') or sys.platform.startswith('linux') : # - elizabeth - my mac wasn't recognizing the regex in the elif 'win32' code
my_path = os.getcwd()+'/dataFiles/'+folder+'/'
files_jpg = glob.glob(my_path + '*.jpg' , recursive=True)
files_jpeg = glob.glob(my_path + '*.jpeg' , recursive=True)
files_png = glob.glob(my_path + '*.png' , recursive=True)

elif sys.platform.startswith('win32'):
my_path = os.getcwd()
files_jpg = glob.glob(my_path + '\\dataFiles\\**\\*.jpg' , recursive=True)
files_jpeg = glob.glob(my_path + '\\dataFiles\\**\\*.jpeg' , recursive=True)
files_png = glob.glob(my_path + '\\dataFiles\\**\\*.png' , recursive=True)

files = files_jpeg + files_jpg + files_png
l = len(files)
# files = sorted(files)

# print(files)
# print(len(files))

return files, l

# For debugging without running the whole app

# my_path = os.getcwd()
# files_jpg = glob.glob(my_path + '\\dataFiles\\**\\*.jpg' , recursive=True)
# files_jpeg = glob.glob(my_path + '\\dataFiles\\**\\*.jpeg' , recursive=True)
# files_png = glob.glob(my_path + '\\dataFiles\\**\\*.png' , recursive=True)

# files = files_jpeg + files_jpg + files_png
# l = len(files)

# main.readImage(files, l)
Loading

0 comments on commit 800c3e6

Please sign in to comment.