Skip to content

Commit

Permalink
Workflow to test docs.github.com links on github/github (github#21601)
Browse files Browse the repository at this point in the history
* run prettier

* removing files from test/integration

* update cron job to not start on the hour

* add github/github comment

* update comment

* updating to search by indices in the content rather than by line

* small updates and updating character max

* update name of script run

* updating to use api search code and get contents

* using api search code and get contents

* adding token check and .js

* remove blank line for title

* update issue body content

* update comment

* add support for GitHub.developer_help_url links
  • Loading branch information
gracepark authored Oct 1, 2021
1 parent 1e2fedd commit 96ff67d
Show file tree
Hide file tree
Showing 3 changed files with 259 additions and 0 deletions.
75 changes: 75 additions & 0 deletions .github/workflows/check-broken-links-github-github.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: Check Broken Docs Links in github/github

# **What it does**: This checks for any broken docs.github.com links in github/github
# **Why we have it**: Make sure all docs in github/github are up to date
# **Who does it impact**: Docs engineering, people on GitHub

on:
workflow_dispatch:
schedule:
- cron: '20 13 * * 1' # run every Monday at 1:20PM UTC

# **IMPORTANT:** Do not change the FREEZE environment variable set here!
# This workflow runs on a recurring basis. To temporarily disable it (e.g.,
# during a docs deployment freeze), add an Actions Secret to the repo settings
# called `FREEZE` with a value of `true`. To re-enable workflow, simply
# delete that Secret from the repo settings. The environment variable here
# will duplicate that Secret's value for later evaluation.
env:
FREEZE: ${{ secrets.FREEZE }}

jobs:
check_github_github_links:
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
env:
# need to use a token from a user with access to github/github for this step
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_FR }}
FIRST_RESPONDER_PROJECT: Docs content first responder
REPORT_AUTHOR: docubot
REPORT_LABEL: github github broken link report
REPORT_REPOSITORY: github/docs-content
steps:
- if: ${{ env.FREEZE == 'true' }}
run: |
echo 'The repo is currently frozen! Exiting this workflow.'
exit 1 # prevents further steps from running
- name: Checkout
uses: actions/checkout@5a4ac9002d0be2fb38bd78e4b4dbde5606d7042f

- name: Setup Node
uses: actions/setup-node@38d90ce44d5275ad62cc48384b3d8a58c500bb5f
with:
node-version: 16.8.x
cache: npm

- name: Install Node.js dependencies
run: npm ci

- name: Run broken github/github link check
run: |
script/check-github-github-links.js > broken_github_github_links.md
# check-github-github-links.js returns 0 if no links are broken, and 1 if any links
# are broken. When an Actions step's exit code is 1, the action run's job status
# is failure and the run ends. The following steps create an issue for the
# broken link report only if any links are broken, so `if: ${{ failure() }}`
# ensures the steps run despite the previous step's failure of the job.
#
# https://docs.github.com/actions/reference/context-and-expression-syntax-for-github-actions#job-status-check-functions

- if: ${{ failure() }}
name: Get title for issue
id: check
run: echo "::set-output name=title::$(head -1 broken_github_github_links.md)"
- if: ${{ failure() }}
name: Create issue from file
id: github-github-broken-link-report
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
with:
token: ${{ env.GITHUB_TOKEN }}
title: ${{ steps.check.outputs.title }}
content-filepath: ./broken_github_github_links.md
repository: ${{ env.REPORT_REPOSITORY }}
labels: ${{ env.REPORT_LABEL }}
141 changes: 141 additions & 0 deletions script/check-github-github-links.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env node

// [start-readme]
//
// Run this script to get all broken docs.github.com links in github/github
//
// [end-readme]

import { getContents, getPathsWithMatchingStrings } from './helpers/git-utils.js'
import got from 'got'

if (!process.env.GITHUB_TOKEN) {
console.error('Error! You must have a GITHUB_TOKEN set in an .env file to run this script.')
process.exit(1)
}

main()

async function main() {
const searchStrings = ['https://docs.github.com', 'GitHub help_url', 'GitHub developer_help_url']
const foundFiles = await getPathsWithMatchingStrings(searchStrings, 'github', 'github')
const searchFiles = [...foundFiles]
.filter((file) => endsWithAny(['.rb', '.yml', '.yaml', '.txt', '.pdf', '.erb', '.js'], file))
.filter(
(file) =>
!file.includes('test/') &&
!file.includes('app/views/') &&
!file.includes('config.') &&
!file.includes('app/api/description/')
)

const docsLinksFiles = []
const urlRegEx =
/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g

for (const file of searchFiles) {
const contents = await getContents('github', 'github', 'master', file)

if (
contents.includes('https://docs.github.com') ||
contents.includes('GitHub.help_url') ||
contents.includes('GitHub.developer_help_url')
) {
const docsIndices = getIndicesOf('https://docs.github.com', contents)
const helpIndices = getIndicesOf('GitHub.help_url', contents)
helpIndices.push(...getIndicesOf('GitHub.developer_help_url', contents))
if (docsIndices.length > 0) {
docsIndices.forEach((numIndex) => {
// Assuming we don't have links close to 500 characters long
const docsLink = contents.substring(numIndex, numIndex + 500).match(urlRegEx)
docsLinksFiles.push([docsLink[0].toString().replace(/[^a-zA-Z0-9]*$|\\n$/g, ''), file])
})
}

if (helpIndices.length > 0) {
helpIndices.forEach((numIndex) => {
// There are certain links like #{GitHub.help_url}#{learn_more_path} and #{GitHub.developer_help_url}#{learn_more_path} that we should skip
if (
(contents.substring(numIndex, numIndex + 11) === 'GitHub.help' &&
contents.charAt(numIndex + 16) !== '#') ||
(contents.substring(numIndex, numIndex + 16) === 'GitHub.developer' &&
contents.charAt(numIndex + 26) !== '#')
) {
const startSearchIndex = contents.indexOf('/', numIndex)
// Looking for the closest '/' after GitHub.developer_help_url or GitHub.help_url
// There are certain links that don't start with `/` so we want to skip those.
// If there's no `/` within 30 characters of GitHub.help_url/GitHub.developer_help_url, skip
if (startSearchIndex - numIndex < 30) {
const helpLink =
'https://docs.github.com' +
contents
.substring(
startSearchIndex,
regexIndexOf(
contents,
/\n|"\)|{@email_tracking_params}|\^http|Ahttps|example|This|TODO"|[{}|"%><.,')* ]/,
startSearchIndex + 1
)
)
.trim()
docsLinksFiles.push([helpLink, file])
}
}
})
}
}
}
const brokenLinks = []
await Promise.all(
docsLinksFiles.map(async (file) => {
try {
await got(file[0])
} catch {
brokenLinks.push(file)
}
})
)
if (!brokenLinks.length) {
console.log('All links are good!')
process.exit(0)
}

console.log(`Found ${brokenLinks.length} total broken links in github/github`)
console.log('```')

console.log(`${JSON.stringify([...brokenLinks], null, 2)}`)

console.log('```')
// Exit unsuccessfully if broken links are found.
process.exit(1)
}

function endsWithAny(suffixes, string) {
for (const suffix of suffixes) {
if (string.endsWith(suffix)) return true
}

return false
}

function getIndicesOf(searchString, string) {
const searchStrLen = searchString.length
if (searchStrLen === 0) return []

let startIndex = 0
let index
const indices = []

while ((index = string.indexOf(searchString, startIndex)) > -1) {
indices.push(index)
startIndex = index + searchStrLen
}

return indices
}

function regexIndexOf(string, regex, startPos) {
const indexOf = string.substring(startPos || 0).search(regex)

return indexOf >= 0 ? indexOf + (startPos || 0) : indexOf
}
43 changes: 43 additions & 0 deletions script/helpers/git-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,46 @@ export async function createIssueComment(owner, repo, pullNumber, body) {
throw err
}
}

// Search for a string in a file in code and return the array of paths to files that contain string
export async function getPathsWithMatchingStrings(strArr, org, repo) {
const perPage = 100
const paths = new Set()

for (const str of strArr) {
try {
const q = `q=${str}+in:file+repo:${org}/${repo}`
let currentPage = 1
let totalCount = 0
let currentCount = 0

do {
const data = await searchCode(q, perPage, currentPage)
data.items.map((el) => paths.add(el.path))
totalCount = data.total_count
currentCount += data.items.length
currentPage++
} while (currentCount < totalCount)
} catch (err) {
console.log(`error searching for ${str} in ${org}/${repo}`)
throw err
}
}

return paths
}

async function searchCode(q, perPage, currentPage) {
try {
const { data } = await github.rest.search.code({
q,
per_page: perPage,
page: currentPage,
})

return data
} catch (err) {
console.log(`error searching for ${q} in code`)
throw err
}
}

0 comments on commit 96ff67d

Please sign in to comment.