-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
151 lines (112 loc) · 5.42 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import sys
import json
from flask import Flask, request, render_template
from pypdf import PdfReader
import google.generativeai as genai
from dotenv import load_dotenv
from google.cloud import aiplatform
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
# Load environment variables from .env file
load_dotenv()
# Configure the UPLOAD_PATH for saving uploaded files
UPLOAD_PATH = r"__DATA__"
app = Flask(__name__)
# Configure the path for custom modules
sys.path.insert(0, os.path.abspath(os.getcwd()))
# Configuring credentials
api_key = os.environ.get('GEMINI_API_KEY')
if not api_key:
raise ValueError("API key is missing. Please set 'GEMINI_API_KEY' in your .env file.")
google_credentials = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
if google_credentials:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = google_credentials
# Initialize the Vertex AI platform
try:
aiplatform.init(project='central-segment-447015-f3', location='us-central1') # Your Google Cloud project details
print("Google Cloud AI Platform initialized successfully!")
except Exception as e:
raise RuntimeError(f"Failed to initialize Google Cloud AI platform: {e}")
# Configure Gemini API
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-1.5-flash')
except Exception as e:
raise RuntimeError(f"Failed to configure Gemini API: {e}")
# Load the knowledge base CSV containing courses and certifications
knowledge_base_df = pd.read_csv('knowledge_base.csv', on_bad_lines='skip')
# Function to extract information from the resume using Gemini
def ats_extractor_with_rag(resume_data):
# Extract relevant keywords from resume (e.g., skills, job roles)
extracted_keywords = extract_keywords(resume_data)
# Retrieve the relevant courses and certifications based on extracted keywords
relevant_courses = get_relevant_courses(extracted_keywords)
# Create the combined prompt with resume data and course information
combined_prompt = f"""
Given the following resume data and relevant course/certification recommendations:
Resume Data:
{resume_data}
Relevant Recommendations (Courses, Certifications, etc.):
{relevant_courses}
Based on the above, generate recommendations for career improvement and certifications.
"""
# Generate the response from the Gemini API
response = model.generate_content(combined_prompt)
return response.text.strip()
# Function to extract keywords (skills, job roles, etc.) from resume
def extract_keywords(resume_data):
# For simplicity, we are using a basic split. You can integrate Gemini to extract specific skills and job roles.
keywords = resume_data.split() # This is a placeholder; use a more sophisticated approach for actual use.
return keywords
# Function to retrieve relevant courses based on extracted keywords
def get_relevant_courses(extracted_keywords):
vectorizer = TfidfVectorizer(stop_words='english')
# Combine all the relevant course information from the knowledge base into a single text
course_descriptions = knowledge_base_df['Title'] + " " + knowledge_base_df['Skills']
# Fit the vectorizer on the course descriptions
vectorizer.fit(course_descriptions)
# Transform the extracted keywords into the same vector space
resume_keywords = [" ".join(extracted_keywords)] # Join keywords as a single string
resume_vec = vectorizer.transform(resume_keywords)
# Calculate cosine similarity between resume and course descriptions
course_vecs = vectorizer.transform(course_descriptions)
cosine_similarities = np.dot(resume_vec, course_vecs.T).toarray().flatten()
# Sort courses by similarity and get the top 5
top_course_indices = cosine_similarities.argsort()[-5:][::-1]
# Retrieve the top matching courses
relevant_courses = knowledge_base_df.iloc[top_course_indices][['Title', 'Provider', 'Type', 'Duration']]
return relevant_courses.to_dict(orient='records')
# Function to read the PDF and extract text
def _read_file_from_path(path):
reader = PdfReader(path)
data = ""
for page_no in range(len(reader.pages)):
page = reader.pages[page_no]
data += page.extract_text()
return data
@app.route('/')
def index():
return render_template('index.html')
@app.route("/process", methods=["POST"])
def ats():
if 'pdf_doc' not in request.files:
return render_template('index.html', error="No PDF file uploaded. Please upload a valid PDF.")
doc = request.files['pdf_doc']
if doc.filename == '':
return render_template('index.html', error="No file selected. Please select a PDF file to upload.")
# Save the uploaded PDF file
doc.save(os.path.join(UPLOAD_PATH, "file.pdf"))
doc_path = os.path.join(UPLOAD_PATH, "file.pdf")
# Read the file and extract information
resume_data = _read_file_from_path(doc_path)
# # Extract resume details and get course recommendations
response = ats_extractor_with_rag(resume_data)
print("Processed Resume Data:")
print(resume_data) # Print the extracted resume data
print("Generated Course and Certification Recommendations:")
print(response) # Print the generated recommendations
return render_template('index.html', data=json.dumps({"Resume Data": resume_data, "AI-based Recommendations": response}))
if __name__ == "__main__":
app.run(port=8000, debug=True)