-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathquery.py
198 lines (172 loc) · 7.4 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""
Run a series of queries on the Neo4j database
"""
import os
import polars as pl
from codetiming import Timer
from dotenv import load_dotenv
from neo4j import GraphDatabase, Session
load_dotenv()
# Config
URI = "bolt://localhost:7687"
NEO4J_USER = os.environ.get("NEO4J_USER")
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
def run_query1(session: Session) -> None:
"Who are the top 3 most-followed persons in the network?"
query = """
MATCH (follower:Person)-[:FOLLOWS]->(person:Person)
RETURN person.personID AS personID, person.name AS name, count(follower) AS numFollowers
ORDER BY numFollowers DESC LIMIT 3
"""
print(f"\nQuery 1:\n {query}")
# with Timer(name="query1", text="Query 1 completed in {:.6f}s"):
response = session.run(query)
result = pl.from_dicts(response.data())
print(f"Top 3 most-followed persons:\n{result}")
return result
def run_query2(session: Session) -> None:
"In which city does the most-followed person in the network live?"
query = """
MATCH (follower:Person) -[:FOLLOWS]-> (person:Person)
WITH person, count(follower) as followers
ORDER BY followers DESC LIMIT 1
MATCH (person) -[:LIVES_IN]-> (city:City)
RETURN person.name AS name, followers AS numFollowers, city.city AS city, city.state AS state, city.country AS country
"""
print(f"\nQuery 2:\n {query}")
response = session.run(query)
result = pl.from_dicts(response.data())
print(f"City in which most-followed person lives:\n{result}")
return result
def run_query3(session: Session, country: str) -> None:
"Which 5 cities in a particular country have the lowest average age in the network?"
query = """
MATCH (p:Person) -[:LIVES_IN]-> (c:City) -[*1..2]-> (co:Country)
WHERE co.country = $country
RETURN c.city AS city, avg(p.age) AS averageAge
ORDER BY averageAge LIMIT 5
"""
print(f"\nQuery 3:\n {query}")
response = session.run(query, country=country)
result = pl.from_dicts(response.data())
print(f"Cities with lowest average age in {country}:\n{result}")
return result
def run_query4(session: Session, age_lower: int, age_upper: int) -> None:
"How many persons between a certain age range are in each country?"
query = """
MATCH (p:Person)-[:LIVES_IN]->(ci:City)-[*1..2]->(country:Country)
WHERE p.age >= $age_lower AND p.age <= $age_upper
RETURN country.country AS countries, count(country) AS personCounts
ORDER BY personCounts DESC LIMIT 3
"""
print(f"\nQuery 4:\n {query}")
response = session.run(query, age_lower=age_lower, age_upper=age_upper)
result = pl.from_dicts(response.data())
print(f"Persons between ages {age_lower}-{age_upper} in each country:\n{result}")
return result
def run_query5(session: Session, gender: str, city: str, country: str, interest: str) -> None:
"How many men in a particular city have an interest in the same thing?"
query = """
MATCH (p:Person)-[:HAS_INTEREST]->(i:Interest)
WHERE tolower(i.interest) = tolower($interest)
AND tolower(p.gender) = tolower($gender)
WITH p, i
MATCH (p)-[:LIVES_IN]->(c:City)
WHERE c.city = $city AND c.country = $country
RETURN count(p) AS numPersons
"""
print(f"\nQuery 5:\n {query}")
response = session.run(query, gender=gender, city=city, country=country, interest=interest)
result = pl.from_dicts(response.data())
print(
f"Number of {gender} users in {city}, {country} who have an interest in {interest}:\n{result}"
)
return result
def run_query6(session: Session, gender: str, interest: str) -> None:
"Which city has the maximum number of people of a particular gender that share a particular interest"
query = """
MATCH (p:Person)-[:HAS_INTEREST]->(i:Interest)
WHERE tolower(i.interest) = tolower($interest)
AND tolower(p.gender) = tolower($gender)
WITH p, i
MATCH (p)-[:LIVES_IN]->(c:City)
RETURN count(p) AS numPersons, c.city AS city, c.country AS country
ORDER BY numPersons DESC LIMIT 5
"""
print(f"\nQuery 6:\n {query}")
response = session.run(query, gender=gender, interest=interest)
result = pl.from_dicts(response.data())
print(f"Cities with the most {gender} users who have an interest in {interest}:\n{result}")
return result
def run_query7(
session: Session, country: str, age_lower: int, age_upper: int, interest: str
) -> None:
"Which U.S. state has the maximum number of persons between a specified age who enjoy a particular interest?"
query = """
MATCH (p:Person)-[:LIVES_IN]->(:City)-[:CITY_IN]->(s:State)
WHERE p.age >= $age_lower AND p.age <= $age_upper AND s.country = $country
WITH p, s
MATCH (p)-[:HAS_INTEREST]->(i:Interest)
WHERE tolower(i.interest) = tolower($interest)
RETURN count(p) AS numPersons, s.state AS state, s.country AS country
ORDER BY numPersons DESC LIMIT 1
"""
print(f"\nQuery 7:\n {query}")
response = session.run(
query, country=country, age_lower=age_lower, age_upper=age_upper, interest=interest
)
result = pl.from_dicts(response.data())
print(
f"""
State in {country} with the most users between ages {age_lower}-{age_upper} who have an interest in {interest}:\n{result}
"""
)
return result
def run_query8(session: Session) -> None:
"How many second-degree paths exist in the graph?"
query = """
MATCH (a:Person)-[r1:FOLLOWS]->(b:Person)-[r2:FOLLOWS]->(c:Person)
RETURN count(*) AS numPaths
"""
print(f"\nQuery 8:\n {query}")
response = session.run(query)
result = pl.from_dicts(response.data())
print(
f"""
Number of second-degree paths:\n{result}
"""
)
return result
def run_query9(session: Session, age_1: int, age_2: int) -> None:
"How many paths exist in the graph through persons below a certain age to persons above a certain age?"
query = """
MATCH (a:Person)-[r1:FOLLOWS]->(b:Person)-[r2:FOLLOWS]->(c:Person)
WHERE b.age < $age_1 AND c.age > $age_2
RETURN count(*) as numPaths
"""
print(f"\nQuery 9:\n {query}")
response = session.run(query, age_1=age_1, age_2=age_2)
result = pl.from_dicts(response.data())
print(
f"""
Number of paths through persons below {age_1} to persons above {age_2}:\n{result}
"""
)
return result
def main() -> None:
with GraphDatabase.driver(URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) as driver:
with driver.session(database="neo4j") as session:
with Timer(name="queries", text="Neo4j query script completed in {:.6f}s"):
# fmt: off
_ = run_query1(session)
_ = run_query2(session)
_ = run_query3(session, country="United States")
_ = run_query4(session, age_lower=30, age_upper=40)
_ = run_query5(session, gender="male", city="London", country="United Kingdom", interest="fine dining")
_ = run_query6(session, gender="female", interest="tennis")
_ = run_query7(session, country="United States", age_lower=23, age_upper=30, interest="photography")
_ = run_query8(session)
_ = run_query9(session, age_1=50, age_2=25)
# fmt: on
if __name__ == "__main__":
main()