diff --git a/.coafile b/.coafile index b63c9ef..67eb3c3 100644 --- a/.coafile +++ b/.coafile @@ -14,6 +14,7 @@ bears = LineLengthBear [all.links] bears = InvalidLinkBear +link_ignore_regex = (github\.com|\{|\$) [js] files = lib/**/*.js, static/js/**/*.js diff --git a/lib/scrape.js b/lib/scrape.js index 584b272..9542abc 100644 --- a/lib/scrape.js +++ b/lib/scrape.js @@ -2,8 +2,10 @@ const fetch = require('node-fetch') const chattie = require('chattie') const fs = require('fs') const json2yaml = require('json2yaml') +const validUsername = require('valid-github-username') const GH_USER_BASE = 'https://github.com/users' +const GH_ORG_BASE = 'https://github.com/orgs' const GH_API_BASE = 'https://api.github.com' const GCI_API_BASE = 'https://codein.withgoogle.com/api' @@ -19,12 +21,23 @@ const CHAT_IMAGES = { OTHER: 'static/images/chat.png', } -const GITHUB_OPTIONS = { +const GH_API_OPTIONS = { headers: process.env.GITHUB_TOKEN ? { Authorization: `token ${process.env.GITHUB_TOKEN}` } : {}, } +const GH_WEB_OPTIONS = { + headers: { + Accept: 'text/html', + 'Accept-Encoding': 'utf8', + 'Accept-Language': 'en-US,en;q=0.9', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': 'Mozilla/5.0', + }, + compress: false, +} + async function fetchProgram() { const res = await fetch(`${GCI_API_BASE}/program/2017/`) return await res.json() @@ -45,7 +58,7 @@ async function fetchLeaders(id) { async function searchGitHubOrgs(query) { const res = await fetch( `${GH_API_BASE}/search/users?q=${query}%20type:org`, - GITHUB_OPTIONS + GH_API_OPTIONS ) const { items } = await res.json() return items || [] @@ -97,7 +110,7 @@ function findMatches(input, pattern) { } async function getGitHubUser(user) { - const res = await fetch(`${GH_API_BASE}/users/${user}`, GITHUB_OPTIONS) + const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS) let response = await res.json() if (response && response.message) { response = undefined @@ -155,14 +168,17 @@ async function findOrganization({ async function findGitHubUser({ display_name }, org) { if (!org) return - display_name = display_name.replace(/ /g, '') + const shortName = validUsername(display_name) + + const username = await findGitHubUserInOrg(display_name, org) + if (username) return username const displayNamePattern = /^[a-zA-Z0-9-]{1,39}$/ - const displayNameMatches = displayNamePattern.exec(display_name) + const displayNameMatches = displayNamePattern.exec(shortName) if (!displayNameMatches) return - const user = await getGitHubUser(display_name) + const user = await getGitHubUser(shortName) if (!user) return const login = user.login @@ -183,6 +199,19 @@ async function findGitHubUser({ display_name }, org) { } } +async function findGitHubUserInOrg(user, org) { + const pattern = new RegExp( + '' + ) + const res = await fetch( + `${GH_ORG_BASE}/${org}/people?query=${user}`, + GH_WEB_OPTIONS + ) + const body = await res.text() + const match = pattern.exec(body) + return match ? match[1] : null +} + async function fetchOrgsWithData() { const orgs = await fetchOrgs() const fetchingLeaders = orgs.map(org => fetchLeaders(org.id)) diff --git a/package-lock.json b/package-lock.json index 8047c5a..7d80a34 100644 --- a/package-lock.json +++ b/package-lock.json @@ -957,7 +957,7 @@ "eslint-plugin-prettier": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-2.3.1.tgz", - "integrity": "sha1-56dGxn5xbzNSdLiClanq2fVE5E0=", + "integrity": "sha512-AV8shBlGN9tRZffj5v/f4uiQWlP3qiQ+lh+BhTqRLuKSyczx+HRWVkVZaf7dOmguxghAH1wftnou/JUEEChhGg==", "dev": true, "requires": { "fast-diff": "1.1.2", @@ -1086,7 +1086,7 @@ "fast-diff": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/fast-diff/-/fast-diff-1.1.2.tgz", - "integrity": "sha1-S2LEK44D3j+EhGC2OQeZIGldAVQ=", + "integrity": "sha512-KaJUt+M9t1qaIteSvjc6P3RbMdXsNhK61GRftR6SNxqmhthcd9MGIi4T+o0jD8LUSpSnSKXE20nLtJ3fOHxQig==", "dev": true }, "fast-json-stable-stringify": { @@ -1637,7 +1637,7 @@ "jest-docblock": { "version": "21.2.0", "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-21.2.0.tgz", - "integrity": "sha1-UVKcOzDV/RWdpgwnzu3Blfr41BQ=", + "integrity": "sha512-5IZ7sY9dBAYSV+YjQ0Ovb540Ku7AO9Z5o2Cg789xj167iQuZ2cG+z0f3Uct6WeYLbU6aQiM2pCs7sZ+4dotydw==", "dev": true }, "js-base64": { @@ -3190,6 +3190,11 @@ "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.1.0.tgz", "integrity": "sha512-DIWtzUkw04M4k3bf1IcpS2tngXEL26YUD2M0tMDUpnUrz2hgzUBlD55a4FjdLGPvfHxS6uluGWvaVEqgBcVa+g==" }, + "valid-github-username": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/valid-github-username/-/valid-github-username-0.0.1.tgz", + "integrity": "sha512-XRRFXAg8dzghGKVyVZzBDtylX1lKAP8jl5z4csfAHU8lylkw/b9s6rDlXipPFv+8IogVPM2p8lZhjNN+e30VHw==" + }, "validate-npm-package-license": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.1.tgz", diff --git a/package.json b/package.json index 708fd19..72964f1 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,8 @@ "json2yaml": "^1.1.0", "mustache": "^2.3.0", "ncp": "^2.0.0", - "node-fetch": "^1.7.3" + "node-fetch": "^1.7.3", + "valid-github-username": "0.0.1" }, "devDependencies": { "csslint": "^1.0.5",