Skip to content

Commit

Permalink
scrape.js: Detect if GCI name is GitHub username
Browse files Browse the repository at this point in the history
This detects when a student's GCI display name is their GitHub
username. To ensure we don't get the wrong account, this will first
verify that the user has been active since the start of GCI. If they
have, we then check if the user has created an issue or PR to the
organization's GitHub org. If so, we consider them the correct user.

Closes coala#32
  • Loading branch information
andrewda committed Dec 11, 2017
1 parent d4a8950 commit fe527a2
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 10 deletions.
92 changes: 84 additions & 8 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ const CHAT_IMAGES = {
OTHER: 'static/images/chat.png',
}

const GITHUB_OPTIONS = {
headers: process.env.GITHUB_TOKEN
? { Authorization: `token ${process.env.GITHUB_TOKEN}` }
: {},
}

async function fetchProgram() {
const res = await fetch('https://codein.withgoogle.com/api/program/2017/')
return await res.json()
}

async function fetchOrgs() {
const res = await fetch(
'https://codein.withgoogle.com/api/program/2017/organization/?status=2'
Expand All @@ -33,17 +44,40 @@ async function fetchLeaders(id) {
}

async function searchGitHubOrgs(query) {
const token = process.env.GITHUB_TOKEN
const res = await fetch(
`${GITHUB_API_BASE}/search/users?q=${query}%20type:org`,
{
headers: token ? { Authorization: `token ${token}` } : {},
}
GITHUB_OPTIONS
)
const { items } = await res.json()
return items || []
}

async function getGitHubUserCommits(user, from, to) {
const commitPattern = /<a href="(?:[a-zA-Z1-9/-]+)">([a-zA-Z1-9/-]+)<\/a>/g
const res = await fetch(
`https://github.com/users/${user}/created_commits?from=${from}&to=${to}`
)
const body = await res.text()

const commits = []
let match = commitPattern.exec(body)
while (match) {
commits.push(match[1])
match = commitPattern.exec(body)
}

return commits
}

async function getGitHubUser(user) {
const res = await fetch(`${GITHUB_API_BASE}/users/${user}`, GITHUB_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
}
return response
}

async function findOrganization({
name,
description,
Expand Down Expand Up @@ -91,6 +125,35 @@ async function findOrganization({
return null
}

async function findGitHubUser({ display_name }, org) {
if (!org) return

const displayNamePattern = /^[a-zA-Z1-9-]{1,39}$/

const displayNameMatches = displayNamePattern.exec(display_name)
if (!displayNameMatches) return

const user = await getGitHubUser(display_name)
if (!user) return

const login = user.login

const { competition_open_starts } = await fetchProgram()

const updatedTime = new Date(user.updated_at)
const openTime = new Date(competition_open_starts)

if (updatedTime.getTime() - openTime.getTime() < 0) return

const nov = await getGitHubUserCommits(login, '2017-11-28', '2017-11-30')
const dec = await getGitHubUserCommits(login, '2017-12-01', '2017-12-31')
const jan = await getGitHubUserCommits(login, '2018-01-01', '2018-01-17')
const orgs = nov.concat(dec).concat(jan).map(repo => repo.split('/')[0])
if (orgs.indexOf(org) > -1) {
return user.login
}
}

async function fetchOrgsWithData() {
const orgs = await fetchOrgs()
const fetchingLeaders = orgs.map(org => fetchLeaders(org.id))
Expand All @@ -100,17 +163,30 @@ async function fetchOrgsWithData() {
const orgGitHub = await Promise.all(fetchingGitHub)
const orgChats = await Promise.all(fetchingChat)

return orgs.map((org, index) =>
Object.assign(org, {
leaders: orgLeaders[index],
const fetchingAll = orgs.map(async (org, index) => {
const fetchingUsers = orgLeaders[index].map(user =>
findGitHubUser(user, orgGitHub[index])
)
const orgUsers = await Promise.all(fetchingUsers)

const leaders = orgLeaders[index].map((user, index) =>
Object.assign(user, {
github_account: orgUsers[index],
})
)

return Object.assign(org, {
leaders: leaders,
github: orgGitHub[index],
chat: {
url: orgChats[index].url,
platform: chattie.CHAT[orgChats[index].type],
image: CHAT_IMAGES[chattie.CHAT[orgChats[index].type]],
},
})
)
})

return await Promise.all(fetchingAll)
}

async function fetchDates() {
Expand Down
13 changes: 11 additions & 2 deletions templates/main.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ <h1>Google Code-in 2017 Current Leaders</h1>
</div>
<hr>
<i>
The leading participants for each organization are listed randomly.
The leading participants for each organization (and their GitHub accounts,
if applicable) are listed randomly.
</i>
<div class="orgs">
{{#withLeader}}
Expand Down Expand Up @@ -67,7 +68,15 @@ <h3>
<div class="org-leaderboard">
<ul>
{{#leaders}}
<li>{{display_name}}</li>
<li>
{{#github_account}}
<a href="https://github.com/{{github_account}}">
{{/github_account}}
{{display_name}}
{{#github_account}}
</a>
{{/github_account}}
</li>
{{/leaders}}
</ul>
</div>
Expand Down

0 comments on commit fe527a2

Please sign in to comment.