Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs committed Dec 25, 2023
1 parent d86968b commit 15558b7
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 11 deletions.
15 changes: 12 additions & 3 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions provider/docs/bin/docs-query.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { readFile } from 'fs/promises'
import path from 'path'
import { createCorpus, Document } from '../src/corpus/corpus'

const args = process.argv.slice(2)

const query = args[0]
const corpusFiles = args.slice(1)

const USAGE = `\nUsage: ${path.basename(process.argv[1])} <query> <corpus-files>`
if (!query) {
console.error('Error: no query specified')
console.error(USAGE)
process.exit(1)
}
if (corpusFiles.length === 0) {
console.error('Error: no corpus files specified')
console.error(USAGE)
process.exit(1)
}

const docs: Document[] = await Promise.all(
corpusFiles.map(async (file, i) => {
const data = await readFile(file, 'utf8')
return {
docID: i + 1,
text: data,
} satisfies Document
})
)

const corpus = createCorpus(docs)
const results = await corpus.search(query)
console.error(`# ${corpus.length} docs in corpus`)
console.error(`# Query: ${JSON.stringify(query)}`)
const MAX_RESULTS = 5
console.error(`# ${results.length} results${results.length > MAX_RESULTS ? ` (showing top ${MAX_RESULTS})` : ''}`)
for (const [i, result] of results.slice(0, MAX_RESULTS).entries()) {
const docFile = corpusFiles[result.docID - 1]
if (i !== 0) {
console.log()
}
console.log(`#${i + 1} [${result.score.toFixed(3)}] ${docFile}#chunk${result.chunk}`)
console.log(`${indent(truncate(result.excerpt.replace(/\n\n/g, '\n'), 300), '\t')}`)
}

function truncate(text: string, maxLength: number): string {
if (text.length > maxLength) {
return text.slice(0, maxLength) + '...'
}
return text
}

function indent(text: string, indent: string): string {
if (text === '') {
return ''
}
return indent + text.replace(/\n/g, '\n' + indent)
}
7 changes: 4 additions & 3 deletions provider/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"directory": "provider/docs"
},
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"main": "dist/src/index.js",
"types": "dist/src/index.d.ts",
"files": [
"dist",
"!**/*.test.*",
Expand All @@ -19,7 +19,8 @@
"sideEffects": false,
"scripts": {
"build": "tsc --build",
"test": "vitest"
"test": "vitest",
"docs-query": "node --no-warnings=ExperimentalWarning --experimental-specifier-resolution=node --loader ts-node/esm/transpile-only bin/docs-query.ts"
},
"dependencies": {
"@opencodegraph/provider": "workspace:*",
Expand Down
2 changes: 1 addition & 1 deletion provider/docs/src/corpus/corpus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { multiSearch } from './search/multi'
* A documentation corpus.
*/
export interface Corpus {
search(query: string, keyword: boolean): Promise<CorpusSearchResult[]>
search(query: string): Promise<CorpusSearchResult[]>
length: number
}

Expand Down
3 changes: 2 additions & 1 deletion provider/docs/src/corpus/search/multi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ export async function multiSearch(storage: StoredCorpus, query: string): Promise
docResults.set(result.chunk, { ...chunkResult, score: chunkResult.score + result.score })
}

return Array.from(combinedResults.values()).flatMap(docResults => Array.from(docResults.values()))
const results = Array.from(combinedResults.values()).flatMap(docResults => Array.from(docResults.values()))
return results.toSorted((a, b) => b.score - a.score)
}

const SEARCH_METHODS: ((
Expand Down
2 changes: 1 addition & 1 deletion provider/docs/src/e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ describe('e2e', () => {
const codeFile = await fs.readFile(path.join(__dirname, 'testdata/code/urlParsing.ts'), 'utf8')

const corpus = createCorpus([{ docID: 1, text: docFile }])
const results = await corpus.search(codeFile, false)
const results = await corpus.search(codeFile)
roundScores(results)
expect(results).toEqual<CorpusSearchResult[]>([
{
Expand Down
8 changes: 6 additions & 2 deletions provider/docs/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
"extends": "../../.config/tsconfig.base.json",
"compilerOptions": {
"module": "ESNext",
"rootDir": "src",
"rootDir": ".",
"outDir": "dist",
"lib": ["ESNext"],
},
"include": ["src"],
"ts-node": {
"esm": true,
"experimentalSpecifierResolution": "node",
},
"include": ["src", "bin"],
"exclude": ["dist", "src/testdata", "vitest.config.ts"],
"references": [{ "path": "../../lib/provider" }],
}
1 change: 1 addition & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
{ "path": "client/vscode/test/integration" },
{ "path": "client/web-playground" },
{ "path": "provider/docs" },
{ "path": "provider/docs/bin" },
{ "path": "provider/hello-world" },
{ "path": "provider/links" },
{ "path": "provider/prometheus" },
Expand Down

0 comments on commit 15558b7

Please sign in to comment.