Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scrape.js: Search org for user #75

Merged
merged 1 commit into from
Dec 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .coafile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ bears = LineLengthBear

[all.links]
bears = InvalidLinkBear
link_ignore_regex = (github\.com|\{|\$)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's link_ignore_list that uses globbing instead
link_ignore_list = https://github.com/**

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^ Optional.


[js]
files = lib/**/*.js, static/js/**/*.js
Expand Down
41 changes: 35 additions & 6 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ const fetch = require('node-fetch')
const chattie = require('chattie')
const fs = require('fs')
const json2yaml = require('json2yaml')
const validUsername = require('valid-github-username')

const GH_USER_BASE = 'https://github.com/users'
const GH_ORG_BASE = 'https://github.com/orgs'
const GH_API_BASE = 'https://api.github.com'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

Expand All @@ -19,12 +21,23 @@ const CHAT_IMAGES = {
OTHER: 'static/images/chat.png',
}

const GITHUB_OPTIONS = {
const GH_API_OPTIONS = {
headers: process.env.GITHUB_TOKEN
? { Authorization: `token ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_WEB_OPTIONS = {
headers: {
Accept: 'text/html',
'Accept-Encoding': 'utf8',
'Accept-Language': 'en-US,en;q=0.9',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0',
},
compress: false,
}

async function fetchProgram() {
const res = await fetch(`${GCI_API_BASE}/program/2017/`)
return await res.json()
Expand All @@ -45,7 +58,7 @@ async function fetchLeaders(id) {
async function searchGitHubOrgs(query) {
const res = await fetch(
`${GH_API_BASE}/search/users?q=${query}%20type:org`,
GITHUB_OPTIONS
GH_API_OPTIONS
)
const { items } = await res.json()
return items || []
Expand Down Expand Up @@ -97,7 +110,7 @@ function findMatches(input, pattern) {
}

async function getGitHubUser(user) {
const res = await fetch(`${GH_API_BASE}/users/${user}`, GITHUB_OPTIONS)
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
Expand Down Expand Up @@ -155,14 +168,17 @@ async function findOrganization({
async function findGitHubUser({ display_name }, org) {
if (!org) return

display_name = display_name.replace(/ /g, '')
const shortName = validUsername(display_name)

const username = await findGitHubUserInOrg(display_name, org)
if (username) return username

const displayNamePattern = /^[a-zA-Z0-9-]{1,39}$/

const displayNameMatches = displayNamePattern.exec(display_name)
const displayNameMatches = displayNamePattern.exec(shortName)
if (!displayNameMatches) return

const user = await getGitHubUser(display_name)
const user = await getGitHubUser(shortName)
if (!user) return

const login = user.login
Expand All @@ -183,6 +199,19 @@ async function findGitHubUser({ display_name }, org) {
}
}

async function findGitHubUserInOrg(user, org) {
const pattern = new RegExp(
'<a class="css-truncate-target f4" href="/([a-zA-Z0-9-]{1,39})">'
)
const res = await fetch(
`${GH_ORG_BASE}/${org}/people?query=${user}`,
GH_WEB_OPTIONS
)
const body = await res.text()
const match = pattern.exec(body)
return match ? match[1] : null
}

async function fetchOrgsWithData() {
const orgs = await fetchOrgs()
const fetchingLeaders = orgs.map(org => fetchLeaders(org.id))
Expand Down
11 changes: 8 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"json2yaml": "^1.1.0",
"mustache": "^2.3.0",
"ncp": "^2.0.0",
"node-fetch": "^1.7.3"
"node-fetch": "^1.7.3",
"valid-github-username": "0.0.1"
},
"devDependencies": {
"csslint": "^1.0.5",
Expand Down