diff --git a/package-lock.json b/package-lock.json index 1449b84d1..ce0778cb1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -46,7 +46,6 @@ "multiformats": "^9.4.8", "pako": "^1.0.11", "prompts": "^2.4.1", - "readable-stream": "^3.6.0", "resource-counter": "^1.2.4", "sodium-native": "^3.4.1", "threads": "^1.6.5" diff --git a/package.json b/package.json index a34bd692c..5755a8bb0 100644 --- a/package.json +++ b/package.json @@ -102,7 +102,6 @@ "multiformats": "^9.4.8", "pako": "^1.0.11", "prompts": "^2.4.1", - "readable-stream": "^3.6.0", "resource-counter": "^1.2.4", "sodium-native": "^3.4.1", "threads": "^1.6.5" diff --git a/src/git/http.ts b/src/git/http.ts new file mode 100644 index 000000000..1f1b7d6b8 --- /dev/null +++ b/src/git/http.ts @@ -0,0 +1,425 @@ +import type { + CapabilityList, + Reference, + ObjectId, + ObjectIdList, +} from './types'; +import type { EncryptedFS } from 'encryptedfs'; +import { Buffer } from 'buffer'; +import git from 'isomorphic-git'; +import * as gitUtils from './utils'; +import * as utils from '../utils'; + +/** + * Reference discovery + * Notes: + * + * Server SHOULD terminate each non-flush line using LF ("\n") terminator; + * client MUST NOT complain if there is no terminator. + * + * The returned response is a pkt-line stream describing each ref and its current value. + * The stream MUST be sorted by name according to the C locale ordering. + * + * If HEAD is a valid ref, HEAD MUST appear as the first advertised ref. + * If HEAD is not a valid ref, HEAD MUST NOT appear in the advertisement list at all, but other refs may still appear. + * + * The stream MUST include capability declarations behind a NUL on the first ref. + * The peeled value of a ref (that is "ref^{}") MUST be immediately after the ref itself, if presented. + * A conforming server MUST peel the ref if it’s an annotated tag. + * + * advertised-refs = (no-refs / list-of-refs) + * *shallow + * flush-pkt + * + * no-refs = PKT-LINE(zero-id SP "capabilities^{}" + * NUL capability-list LF) + * + * list-of-refs = first-ref *other-ref + * first-ref = PKT-LINE(obj-id SP refname + * NUL capability-list LF) + * + * other-ref = PKT-LINE(other-tip / other-peeled) + * other-tip = obj-id SP refname LF + * other-peeled = obj-id SP refname "^{}" LF + * + * shallow = PKT-LINE("shallow" SP obj-id) + * + * capability-list = capability *(SP capability) + * capability = 1*(LC_ALPHA / DIGIT / "-" / "_") + * LC_ALPHA = %x61-7A + */ + +/* + * Smart ref discovery response looks like + * + * ``` + * S: 200 OK + * S: Content-Type: application/x-git-upload-pack-advertisement + * S: Cache-Control: no-cache + * S: + * S: 001e# service=git-upload-pack\n + * S: 0000 + * S: 004895dcfa3633004da0049d3d0fa03f80589cbcaf31 refs/heads/maint\0multi_ack\n + * S: 003fd049f6c27a2244e12041955e262a404c7faba355 refs/heads/master\n + * S: 003c2cb58b79488a98d2721cea644875a8dd0026b115 refs/tags/v1.0\n + * S: 003fa3c2e2402b99163d1d59756e5f207ae21cccba4c refs/tags/v1.0^{}\n + * S: 0000 + * ``` + * + * ``` + * smart_reply = PKT-LINE("# service=$servicename" LF) + * "0000" + * *1("version 1") + * ref_list + * "0000" + * ref_list = empty_list / non_empty_list + * empty_list = PKT-LINE(zero-id SP "capabilities^{}" NUL cap-list LF) + * non_empty_list = PKT-LINE(obj-id SP name NUL cap_list LF) + * *ref_record + * cap-list = capability *(SP capability) + * capability = 1*(LC_ALPHA / DIGIT / "-" / "_") + * LC_ALPHA = %x61-7A + * ref_record = any_ref / peeled_ref + * any_ref = PKT-LINE(obj-id SP name LF) + * peeled_ref = PKT-LINE(obj-id SP name LF) + * PKT-LINE(obj-id SP name "^{}" LF + * NUL = %x00 + * zero-id = 40*"0" + * obj-id = 40*(HEXDIGIT) + * ``` + */ + +/** + * this is the main method for generating the smart HTTP response for the reference discovery phase. + * The server advertises the available references + * + * Servers MUST terminate the response with the magic 0000 end pkt-line marker. + * + * The returned response is a pkt-line stream describing each ref and its known value. The stream SHOULD be sorted by + * name according to the C locale ordering. The stream SHOULD include the default ref named HEAD as the first ref. + * The stream MUST include capability declarations behind a NUL on the first ref. + * + * ``` + * Smart_reply = PKT-LINE("# service=$servicename" LF) + * "0000" + * *1("version 1") + * ref_list + * "0000" + * ``` + * + * `referenceList` is called for generating the `ref_list` stage. + */ +async function* advertiseRefGenerator({ + efs, + dir, + gitDir, +}: { + efs: EncryptedFS; + dir: string; + gitDir: string; +}): AsyncGenerator { + // Providing side-band-64, symref for the HEAD and agent name capabilities + const capabilityList = [ + gitUtils.SIDE_BAND_64_CAPABILITY, + await gitUtils.referenceCapability({ + efs: efs, + dir, + gitDir, + reference: gitUtils.HEAD_REFERENCE, + }), + gitUtils.AGENT_CAPABILITY, + ]; + const objectGenerator = gitUtils.listReferencesGenerator({ + efs, + dir, + gitDir, + }); + + // PKT-LINE("# service=$servicename" LF) + yield packetLineBuffer(gitUtils.REFERENCE_DISCOVERY_HEADER); + // "0000" + yield gitUtils.FLUSH_PACKET_BUFFER; + // Ref_list + yield* referenceListGenerator(objectGenerator, capabilityList); + // "0000" + yield gitUtils.FLUSH_PACKET_BUFFER; +} + +/** + * Generates `Ref_list` lines from resolved references streamed from the `objectGenerator`. + * This is called by `advertiseRefGenerator` for generating each reference line in the reference discovery response. + * + * ``` + * Ref_list = empty_list / non_empty_list + * empty_list = PKT-LINE(zero-id SP "capabilities^{}" NUL cap-list LF) + * non_empty_list = PKT-LINE(obj-id SP name NUL cap_list LF) + * *ref_record + * ref_record = any_ref / peeled_ref + * any_ref = PKT-LINE(obj-id SP name LF) + * peeled_ref = PKT-LINE(obj-id SP name LF) + * PKT-LINE(obj-id SP name "^{}" LF + * cap-list = capability *(SP capability) + * ``` + */ +async function* referenceListGenerator( + objectGenerator: AsyncGenerator<[Reference, ObjectId], void, void>, + capabilities: CapabilityList, +): AsyncGenerator { + // Cap-list = capability *(SP capability) + const capabilitiesListBuffer = Buffer.from( + capabilities.join(gitUtils.SPACE_STRING), + ); + // Ref_list = empty_list / non_empty_list + // Non_empty_list = PKT-LINE(obj-id SP name NUL cap_list LF) + // *ref_record + let first = true; + for await (const [name, objectId] of objectGenerator) { + if (first) { + // PKT-LINE(obj-id SP name NUL cap_list LF) + yield packetLineBuffer( + Buffer.concat([ + Buffer.from(objectId), + gitUtils.SPACE_BUFFER, + Buffer.from(name), + gitUtils.NULL_BUFFER, + capabilitiesListBuffer, + gitUtils.LINE_FEED_BUFFER, + ]), + ); + first = false; + } else { + // PKT-LINE(obj-id SP name LF) + yield packetLineBuffer( + Buffer.concat([ + Buffer.from(objectId), + gitUtils.SPACE_BUFFER, + Buffer.from(name), + gitUtils.LINE_FEED_BUFFER, + ]), + ); + } + } + if (first) { + // If we yielded no objects then we need to yield the empty list + // Empty_list = PKT-LINE(zero-id SP "capabilities^{}" NUL cap-list LF) + yield packetLineBuffer( + Buffer.concat([ + gitUtils.ZERO_ID_BUFFER, + gitUtils.SPACE_BUFFER, + gitUtils.EMPTY_LIST_CAPABILITIES_BUFFER, + gitUtils.NULL_BUFFER, + capabilitiesListBuffer, + gitUtils.LINE_FEED_BUFFER, + ]), + ); + } +} + +/** + * This will take a raw line and encode it as the pkt-line format. + * It adds a 4 byte length indicator to the beginning of a line. + * If a chanel is specified a chanel byte is appended just after the length indicator. + * + * ``` + * pkt-line = data-pkt / flush-pkt + * data-pkt = pkt-len pkt-payload + * pkt-len = 4*(HEXDIG) + * pkt-payload = (pkt-len - 4)*(OCTET) + * ``` + */ +function packetLineBuffer(line: Buffer, channel?: 1 | 2 | 3): Buffer { + let lineLength = line.byteLength; + if (channel != null) { + // Adding channel byte to length + lineLength += 1; + const channelByte = Buffer.from([channel]); + return Buffer.concat([paddedLengthBuffer(lineLength), channelByte, line]); + } else { + return Buffer.concat([paddedLengthBuffer(lineLength), line]); + } +} + +/** + * Creates a 4 byte length delimiter. + * It is formatted as a left padded hex number of the length + * + * ``` + * data-pkt = pkt-len pkt-payload + * pkt-len = 4*(HEXDIG) + * ``` + */ +function paddedLengthBuffer(length: number) { + // Hex formatted length as a string, add 4 to account for the length string + const lengthBuffer = Buffer.from((length + 4).toString(16)); + // Left pad 4 bytes + return Buffer.concat([ + Buffer.alloc(4 - lengthBuffer.byteLength, '0'), + lengthBuffer, + ]); +} + +/** + * This parses the clients request into a list of `wants', 'haves', and capabilities. + * 'wants' indicate objects that the client found and wants from the reference discovery phase. Generally this will be + * list of objects that references point to. It will not include all objects within that reference's branch. + * 'haves' indicate objects that the client has and doesn't need sent over by the server. It's used by the sever to + * decide which objects to send. + * `capabilities` is a list of features the client wants. In our simplified implementation, none of these are really + * used since we default to just handling `side-band-64k` for sending data. In the future we'll have to support the + * capability for the client to push data. + * + * Clients MUST NOT reuse or revalidate a cached response. Servers MUST include sufficient Cache-Control headers to + * prevent caching of the response. + * + * Servers SHOULD support all capabilities defined here. + * + * Clients MUST send at least one "want" command in the request body. Clients MUST NOT reference an id in a "want" + * command which did not appear in the response obtained through ref discovery unless the server advertises capability + * allow-tip-sha1-in-want or allow-reachable-sha1-in-want. + * + * ``` + * compute_request = want_list + * have_list + * request_end + * request_end = "0000" / "done" + * want_list = PKT-LINE(want SP cap_list LF) + * *(want_pkt) + * want_pkt = PKT-LINE(want LF) + * want = "want" SP id + * cap_list = capability *(SP capability) + * have_list = *PKT-LINE("have" SP id LF) + * ``` + * + * @returns [wants, haves, capabilities] + */ +async function parsePackRequest( + body: Array, +): Promise<[ObjectIdList, ObjectIdList, CapabilityList]> { + let workingBuffer = Buffer.alloc(0, 0); + const wants: Array = []; + const haves: Array = []; + const capabilities: CapabilityList = []; + for (const bodyElement of body) { + workingBuffer = Buffer.concat([workingBuffer, bodyElement]); + let firstLine = true; + while (true) { + const parsedData = gitUtils.parseRequestLine(workingBuffer); + if (parsedData == null) break; + const [type, objectId, parsedCapabilities, rest] = parsedData; + workingBuffer = rest; + if (firstLine) { + capabilities.push(...parsedCapabilities); + firstLine = false; + } + switch (type) { + case 'want': + wants.push(objectId); + break; + case 'have': + haves.push(objectId); + break; + case 'SEPARATOR': + break; + case 'done': + return [wants, haves, capabilities]; + default: + utils.never( + `Type should be either 'want' or 'have', found '${type}'`, + ); + } + } + } + return [wants, haves, capabilities]; +} + +/** + * This is the main method for handing the packfile-send stage of the http protocol. + * It parses the http body send by the client into a list of `wants` and `haves` using `parsePackRequest`. It then + * uses these lists to walk the git datastructures to decide which objects to send back to the client. + * It does this by using `listObjects` to get all the relevant objects and `generatePackData` to generate the packfile + * part of the response. + * + * It will respond with the `PKT-LINE(NAK_BUFFER)` and then the `packFile` data chunked into lines for the stream. + * + */ +async function* generatePackRequest({ + efs, + dir, + gitDir, + body, +}: { + efs: EncryptedFS; + dir: string; + gitDir: string; + body: Array; +}): AsyncGenerator { + const [wants, haves, _capabilities] = await parsePackRequest(body); + const objectIds = await gitUtils.listObjects({ + efs: efs, + dir, + gitDir: gitDir, + wants, + haves, + }); + // Reply that we have no common history and that we need to send everything + yield packetLineBuffer(gitUtils.NAK_BUFFER); + // Send everything over in pack format + yield* generatePackData({ + efs: efs, + dir, + gitDir, + objectIds, + }); + // Send dummy progress data + yield packetLineBuffer( + gitUtils.DUMMY_PROGRESS_BUFFER, + gitUtils.CHANNEL_PROGRESS, + ); + // Send flush + yield gitUtils.FLUSH_PACKET_BUFFER; +} + +/** + * Called by `generatePackRequest` to generate the `PackFile` data lines as part of the pack response stage. + * Uses `isomorphic-git` to generate the `packFile` data using the provided list of `ObjectIds`. + * The `packFile` is chunked into the `packetLineBuffer` with the size defined by `chunkSize`. + * + */ +async function* generatePackData({ + efs, + dir, + gitDir, + objectIds, + chunkSize = gitUtils.PACK_CHUNK_SIZE, +}: { + efs: EncryptedFS; + dir: string; + gitDir: string; + objectIds: Array; + chunkSize?: number; +}): AsyncGenerator { + const packFile = await git.packObjects({ + fs: efs, + dir, + gitdir: gitDir, + oids: objectIds, + }); + if (packFile.packfile == null) utils.never('failed to create packFile data'); + let packFileBuffer = Buffer.from(packFile.packfile.buffer); + + // Streaming the packFile as chunks of the length specified by the `chunkSize`. + // Each line is formatted as a `PKT-LINE` + do { + const subBuffer = packFileBuffer.subarray(0, chunkSize); + packFileBuffer = packFileBuffer.subarray(chunkSize); + yield packetLineBuffer(subBuffer, gitUtils.CHANNEL_DATA); + } while (packFileBuffer.byteLength > chunkSize); +} + +export { + advertiseRefGenerator, + packetLineBuffer, + parsePackRequest, + generatePackRequest, + generatePackData, +}; diff --git a/src/git/index.ts b/src/git/index.ts index 006019213..4d4323559 100644 --- a/src/git/index.ts +++ b/src/git/index.ts @@ -1,3 +1,4 @@ +export * as http from './http'; export * as utils from './utils'; export * as types from './types'; export * as errors from './errors'; diff --git a/src/git/types.ts b/src/git/types.ts index a7e83b2fc..96a51c587 100644 --- a/src/git/types.ts +++ b/src/git/types.ts @@ -1,86 +1,43 @@ -import type { PassThrough } from 'readable-stream'; - -type Config = { - line: string; - ref?: string; - peeled?: string; - oid?: string; - comment?: boolean; -}; - -type Refs = { - [key: string]: Config; -}; - -type SymRefs = { - [key: string]: string; -}; - -type Ack = { - oid: string; -}; - -type Packfile = { - [key: string]: any; -}; - -type Identity = { - name: string; - email: string; - timestamp: number; - timezoneOffset: number; -}; - -type Pack = { - packstream: PassThrough; - shallows: Set; - unshallows: Set; - acks: Array; -}; - -type PackIndex = { - hashes: string[]; - offsets: Map; - packfileSha: string; - getExternalRefDelta?: ( - oid: string, - ) => Promise; - pack?: Buffer; -}; - -type RawObject = { - oid: string; - type: 'blob' | 'tree' | 'commit' | 'tag'; - format: 'content'; - object: Buffer | string | Uint8Array; - source?: string | undefined; -}; - -type WrappedObject = { - oid: string; - type: 'wrapped'; - format: 'wrapped'; - object: Buffer | string | Uint8Array; - source?: string | undefined; -}; - -type DeflatedObject = { - oid: string; - type: 'deflated'; - format: 'deflated'; - object: Buffer | string | Uint8Array; - source?: string | undefined; -}; +/** + * A hash referring to a git object. + * Has the format of a 40-digit hex number `40*(HEXDIGIT)`. + */ +type ObjectId = string; +/** + * A reference is a branch name or path. There are special references such as `HEAD`. + */ +type Reference = string; + +/** + * An array of `ObjectID`s + */ +type ObjectIdList = Array; +type Capability = string; +type CapabilityList = Array; +const objectTypes = ['blob', 'tree', 'commit', 'tag'] as const; +/** + * The git object type. + * Commits point to a point in history. + * Tags point to a commit. + * Trees point to other objects forming the backbone of the graph. + * Blobs are collections of data and file contents. + */ +type ObjectType = (typeof objectTypes)[number]; +const requestTypes = ['want', 'have', 'SEPARATOR', 'done'] as const; +/** + * The type of request line that was parsed. + * Want refers to a objectId the + */ +type RequestType = (typeof requestTypes)[number]; export type { - Refs, - SymRefs, - Ack, - Packfile, - Identity, - Pack, - PackIndex, - RawObject, - WrappedObject, - DeflatedObject, + ObjectId, + Reference, + ObjectIdList, + Capability, + CapabilityList, + ObjectType, + RequestType, }; + +export { objectTypes, requestTypes }; diff --git a/src/git/utils.ts b/src/git/utils.ts index 65889b9d2..8323f4a1f 100644 --- a/src/git/utils.ts +++ b/src/git/utils.ts @@ -1,1531 +1,317 @@ import type { - Ack, - DeflatedObject, - Identity, - Pack, - PackIndex, - RawObject, - Refs, - SymRefs, - WrappedObject, + Capability, + CapabilityList, + ObjectId, + ObjectIdList, + ObjectType, + Reference, + RequestType, } from './types'; -import type { - CommitObject, - ReadCommitResult, - TreeEntry, - TreeObject, -} from 'isomorphic-git'; import type { EncryptedFS } from 'encryptedfs'; -import path from 'path'; -import pako from 'pako'; -import Hash from 'sha.js/sha1'; -import { PassThrough } from 'readable-stream'; -import createHash from 'sha.js'; -import { errors as gitErrors } from './'; -import * as vaultsUtils from '../vaults/utils'; +import git from 'isomorphic-git'; +import { requestTypes } from './types'; +import * as utils from '../utils'; +import * as validationErrors from '../validation/errors'; + +// Constants +// Total number of bytes per pack line minus the 4 size bytes and 1 channel byte +const PACK_CHUNK_SIZE = 65520 - 4 - 1; +// Ref identifier for the HEAD commit +const HEAD_REFERENCE = 'HEAD'; +// Used to construct to full path for head references +const REFERENCES_STRING = 'refs/heads/'; +// Used to specify the sideband with 3 channels, data, progress and error +const SIDE_BAND_64_CAPABILITY = 'side-band-64k'; +// Specifies the agent name, Only used for logging output by the client +const AGENT_CAPABILITY = 'agent=git/isomorphic-git@1.8.1'; +// Space separator +const SPACE_STRING = ' '; +// Specifies the +const CHANNEL_DATA = 1; +const CHANNEL_PROGRESS = 2; +const CHANNEL_ERROR = 3; +const BUFFER_FORMAT = 'utf-8'; +// Initial string sent when doing a smart http discovery request +const REFERENCE_DISCOVERY_HEADER = Buffer.from( + '# service=git-upload-pack\n', + BUFFER_FORMAT, +); +// NUL = %x00 +const NULL_BUFFER = Buffer.from('\0', BUFFER_FORMAT); +// LF +const LINE_FEED_BUFFER = Buffer.from('\n', BUFFER_FORMAT); +// Zero-id = 40*"0" +const ZERO_ID_BUFFER = Buffer.from('0'.repeat(40), BUFFER_FORMAT); +// Magic string used when no refs are provided +const EMPTY_LIST_CAPABILITIES_BUFFER = Buffer.from( + 'capabilities^{}', + BUFFER_FORMAT, +); +// SP +const SPACE_BUFFER = Buffer.from(SPACE_STRING, BUFFER_FORMAT); +// Flush-pkt = "0000", +// used to indicate a special step or end of the stream. +// This will not be padded with the `PKT-LINE` delimiter. In essence, it's a special delimiter +// since a 0-len line would include the 4 bytes `0004` length delimiter which is explicitly not +// allowed. +const FLUSH_PACKET_BUFFER = Buffer.from('0000', BUFFER_FORMAT); +// Used to indicate no common commits during ref negotiation phase. +const NAK_BUFFER = Buffer.from('NAK\n', BUFFER_FORMAT); +// Used to provide some progress information on `channelProgress`, not sure if it's actually required +const DUMMY_PROGRESS_BUFFER = Buffer.from('progress is at 50%', BUFFER_FORMAT); + +// Functions /** - * List of paths to check for a specific ref. - * @param ref Reference string + * This will generate references and the objects they point to as a tuple. + * `HEAD` is always yielded first along with all branches. */ -const refpaths = (ref: string) => [ - `${ref}`, - `refs/${ref}`, - `refs/tags/${ref}`, - `refs/heads/${ref}`, - `refs/remotes/${ref}`, - `refs/remotes/${ref}/HEAD`, -]; - -const types = { - commit: 0b0010000, - tree: 0b0100000, - blob: 0b0110000, - tag: 0b1000000, - ofs_delta: 0b1100000, - ref_delta: 0b1110000, -}; - -// @see https://git-scm.com/docs/gitrepository-layout -const GIT_FILES = ['config', 'description', 'index', 'shallow', 'commondir']; - -/** - * Converts a buffer into an iterator expected by isomorphic git. - * @param data Data to be turned into an iterator - */ -function iteratorFromData(data: Uint8Array) { - let ended = false; - return { - async next() { - if (ended) { - return { done: true }; - } else { - ended = true; - return { value: data, done: false }; - } - }, - }; -} - -function createGitPacketLine(line: string): string { - const hexPrefix = (4 + line.length).toString(16); - return Array(4 - hexPrefix.length + 1).join('0') + hexPrefix + line; -} - -async function writeRefsAdResponse({ - capabilities, - refs, - symrefs, +async function* listReferencesGenerator({ + efs, + dir, + gitDir, }: { - capabilities: string[]; - refs: Refs; - symrefs: SymRefs; -}): Promise> { - const stream: Buffer[] = []; - // Compose capabilities string - let syms = ''; - for (const [key, value] of Object.entries(symrefs)) { - syms += `symref=${key}:${value} `; - } - let caps = `\x00${[...capabilities].join( - ' ', - )} ${syms}agent=git/isomorphic-git@1.8.1`; - // Note: In the edge case of a brand new repo, zero refs (and zero capabilities) - // are returned. - for (const [key, value] of Object.entries(refs)) { - stream.push(encode(`${value} ${key}${caps}\n`)); - // Stream.push(encode(`${value} ${a}\n`)); - caps = ''; - } - stream.push(Buffer.from('0000', 'utf8')); - return stream; -} - -/** - * Returns the hex encoded format of the input string - */ -function encode(line: string | Buffer): Buffer { - if (typeof line === 'string') { - line = Buffer.from(line); - } - const length = line.length + 4; - const s = length.toString(16); - const hexLength = '0'.repeat(4 - s.length) + s; - return Buffer.concat([Buffer.from(hexLength, 'utf8'), line]); -} - -function compareRefNames(refa: string, refb: string): number { - // https://stackoverflow.com/a/40355107/2168416 - const _a = refa.replace(/\^\{\}$/, ''); - const _b = refb.replace(/\^\{\}$/, ''); - const tmp = -(_a < _b) || +(_a > _b); - if (tmp === 0) { - return refa.endsWith('^{}') ? 1 : -1; - } - return tmp; -} - -/** - * Parses the packed-refs file. - * @param text - contents of the packed refs file. - */ -function textToPackedRefs(text: string): Refs { - const refs: Refs = {}; - if (text) { - let key: string; - text - .trim() - .split('\n') - .map((line) => { - if (/^\s*#/.test(line)) { - return { line: line, comment: true }; - } - const i = line.indexOf(' '); - if (line.startsWith('^')) { - // This is a oid for the commit associated with the annotated tag immediately preceding this line. - // Trim off the '^' - const value = line.slice(1); - // The tagname^{} syntax is based on the output of `git show-ref --tags -d` - this.refs[key + '^{}'] = value; - return { line: line, ref: key, peeled: value }; - } else { - // This is an oid followed by the ref name - const value = line.slice(0, i); - key = line.slice(i + 1); - this.refs[key] = value; - return { line: line, ref: key, oid: value }; - } + efs: EncryptedFS; + dir: string; + gitDir: string; +}): AsyncGenerator<[Reference, ObjectId], void, void> { + const refs: Array<[string, Promise]> = await git + .listBranches({ + fs: efs, + dir, + gitdir: gitDir, + }) + .then((refs) => { + return refs.map((ref) => { + return [ + `${REFERENCES_STRING}${ref}`, + git.resolveRef({ fs: efs, dir, gitdir: gitDir, ref: ref }), + ]; }); - } - return refs; -} - -/** - * Reads and parses the packed-refs file. - * @param fs Filesystem implementation - * @param gitdir Git '.git' directory - */ -async function packedRefs(fs: EncryptedFS, gitdir: string): Promise { - let text: string | Buffer = '# pack-refs with: peeled fully-peeled sorted'; - try { - text = await fs.promises.readFile(path.join(gitdir, 'packed-refs'), { - encoding: 'utf8', }); - } catch (err) { - if (err.code !== 'ENOENT') throw err; - // If no file then ignore and return default. - } - return textToPackedRefs(text!.toString()); -} - -/** - * Obtains a list of all refs by recursively reading the FS. - * @param fs Filesystem implementation - * @param gitdir Git '.git' directory - * @param filepath Path to start listing from. - */ -async function listRefs( - fs: EncryptedFS, - gitdir: string, - filepath: string, -): Promise { - const packedMap = packedRefs(fs, gitdir); - let files: string[] = []; - try { - for await (const file of vaultsUtils.readDirRecursively( - fs, - path.join(gitdir, filepath), - )) { - files.push(file); - } - files = files.map((x) => x.replace(path.join(gitdir, filepath, '/'), '')); - } catch (err) { - files = []; - } - for await (let key of Object.keys(packedMap)) { - // Filter by prefix - if (key.startsWith(filepath)) { - // Remove prefix - key = key.replace(filepath + '/', ''); - // Don't include duplicates; the loose files have precedence anyway - if (!files.includes(key)) { - files.push(key); - } - } - } - // Since we just appended things onto an array, we need to sort them now - files.sort(compareRefNames); - return files; -} - -/** - * Resolves a ref to it's sha hash by walking the fs and packed refs. - * @param fs Filesystem implementation - * @param dir Git working directory - * @param gitdir Git '.git' directory - * @param ref Ref we wish to resolve. - * @param depth How deep to search. - * @returns {String} the resolved sha hash. - */ -async function resolve({ - fs, - dir = '.', - gitdir = '.git', - ref, - depth, -}: { - fs: EncryptedFS; - dir?: string; - gitdir?: string; - ref: string; - depth?: number; -}): Promise { - if (depth !== undefined) { - depth--; - if (depth === -1) { - return ref; - } - } - // Is it a ref pointer? - if (ref.startsWith('ref: ')) { - ref = ref.slice('ref: '.length); - return resolve({ fs, dir, gitdir, ref, depth }); - } - // Is it a complete and valid SHA? - if (ref.length === 40 && /[0-9a-f]{40}/.test(ref)) { - return ref; - } - // We need to alternate between the file system and the packed-refs - const packedMap = await packedRefs(fs, gitdir); - // Look in all the proper paths, in this order - const allpaths = refpaths(ref).filter((p) => !GIT_FILES.includes(p)); // Exclude git system files (#709) - for (const ref of allpaths) { - let sha; - try { - sha = - ( - await fs.promises.readFile(path.join(gitdir, ref), { - encoding: 'utf8', - }) - ).toString() || packedMap[ref].line; - } catch (err) { - if (err.code === 'ENOENT') { - throw new gitErrors.ErrorGitUndefinedRefs(`Ref ${ref} cannot be found`); - } - } - if (sha != null) { - return resolve({ fs, dir, gitdir, ref: sha.trim(), depth }); - } - } - throw new gitErrors.ErrorGitUndefinedRefs(`ref ${ref} corrupted`); -} - -/** - * Obtains a list of all the refs in the repository and formats it. - * @param fs Filesystem implementation - * @param dir Git working directory - * @param gitdir Git '.git' directory - * @param advertiseRefs Bool to specify if we want to advertise the refs. - */ -async function uploadPack({ - fs, - dir = '.', - gitdir = '.git', - advertiseRefs = false, -}: { - fs: EncryptedFS; - dir?: string; - gitdir?: string; - advertiseRefs: boolean; -}): Promise> { - try { - if (advertiseRefs) { - const capabilities = ['side-band-64k']; - let keys = await listRefs(fs, gitdir, 'refs'); - keys = keys.map((ref) => path.join('refs', ref)); - const refs = {}; - keys.unshift('HEAD'); - for (const key of keys) { - refs[key] = await resolve({ fs, dir, gitdir, ref: key }); - } - const symrefs = {}; - symrefs['HEAD'] = await resolve({ - fs, - dir, - gitdir, - ref: 'HEAD', - depth: 2, - }); - const write = { - capabilities: capabilities, - refs: refs, - symrefs: symrefs, - }; - return writeRefsAdResponse(write); - } else { - return []; - } - } catch (err) { - err.caller = 'git.uploadPack'; - throw err; + // HEAD always comes first + const resolvedHead = await git.resolveRef({ + fs: efs, + dir, + gitdir: gitDir, + ref: HEAD_REFERENCE, + }); + yield [HEAD_REFERENCE, resolvedHead]; + for (const [key, refP] of refs) { + yield [key, await refP]; } } /** - * This when given a list of refs works out the missing commits and sends them over as a stream. - * @param fs Filesystem implementation - * @param dir Git working directory - * @param gitdir Git '.git' directory - * @param refs List of refs we want. - * @param depth How deep we want to search commits for. - * @param haves list of oids we already have and can be excluded from the stream. + * Reads the provided reference and formats it as a `symref` capability. + * This capability is used to indicate to the client where a certain ref points to if it doesn't point directly to an + * object. For now only used to indicate what `HEAD` points to. */ -async function packObjects({ - fs, - dir = '.', - gitdir = '.git', - refs, - depth = undefined, - haves = undefined, +async function referenceCapability({ + efs, + dir, + gitDir, + reference, }: { - fs: EncryptedFS; + efs: EncryptedFS; dir: string; - gitdir: string; - refs: string[]; - depth?: number; - haves?: string[]; -}): Promise { - const oids = new Set(); // List of oids for commits we wish to send. - const shallows = new Set(); - const unshallows = new Set(); - const acks: Ack[] = []; // A list of the commits that were found but already had. - haves = haves ? haves : []; // The list of commits we already have. - const since = undefined; - // For each desired ref. - for (const ref of refs) { - // Obtain a list of the relevant commits - const commits = await log({ fs, dir, gitdir, ref, depth, since }); - const oldshallows: string[] = []; // Never actually updated so I have no idea. - for (let i = 0; i < commits.length; i++) { - const commit = commits[i]; - if (haves.includes(commit.oid)) { - acks.push({ - oid: ref, - }); - break; - } - oids.add(commit.oid); - if (i === commits.length - 1) { - if ( - !oldshallows.includes(commit.oid) && - (depth !== undefined || since !== undefined) - ) { - shallows.add(commit.oid); - } - } else if (oldshallows.includes(commit.oid)) { - unshallows.add(commit.oid); - } - } + gitDir: string; + reference: Reference; +}): Promise { + try { + const resolvedHead = await git.resolveRef({ + fs: efs, + dir, + gitdir: gitDir, + ref: reference, + depth: 2, + }); + return `symref=${reference}:${resolvedHead}`; + } catch (e) { + if (e.code === 'ENOENT') throw e; + return ''; } - // Getting all of the Oids within the tree of the desired Oids. - const objects = await listObjects({ - fs, - dir, - gitdir, - oids: Array.from(oids), - }); - const packstream = new PassThrough(); - // Packing, gzipping and returning a stream of all the desired data through packstream. - await pack({ fs, dir, gitdir, oids: [...objects], outputStream: packstream }); - return { packstream, shallows, unshallows, acks }; } /** - * Walks the git objects and returns a list of blobs, commits and trees. - * @param fs Filesystem implementation - * @param dir Git working directory - * @param gitdir Git '.git' directory - * @param oids List of starting oids. + * Preforms a walk of the git data structure, listing all objects found by the walk. + * This starts from all the objects in the `wants` list, walking across all dependent objects while skipping any + * objects in the `haves` list. This results in a complete list of objects that the `haves` require. + * + * Used by `generatePackRequest` to determine which objects are required in the `packFile`. + * The walk is preformed recursively and concurrently using promises. + * Inspecting the git data structure objects is done using `isomorphic-git`. */ async function listObjects({ - fs, - dir = '.', - gitdir = '.git', - oids, + efs, + dir, + gitDir, + wants, + haves, }: { - fs: EncryptedFS; + efs: EncryptedFS; dir: string; - gitdir: string; - oids: string[]; -}): Promise> { + gitDir: string; + wants: ObjectIdList; + haves: ObjectIdList; +}): Promise { const commits = new Set(); const trees = new Set(); const blobs = new Set(); + const tags = new Set(); + const havesSet: Set = new Set(haves); - // We don't do the purest simplest recursion, because we can - // avoid reading Blob objects entirely since the Tree objects - // tell us which oids are Blobs and which are Trees. And we - // do not need to recurse through commit parents. - async function walk(oid: string): Promise { - const gitObject = await readObject({ fs, dir, gitdir, oid }); - if (gitObject.type === 'commit') { - commits.add(oid); - const commit = commitFrom(Buffer.from(gitObject.object)); - const tree = parseHeaders(commit).tree; - await walk(tree); - } else if (gitObject.type === 'tree') { - trees.add(oid); - const tree = treeFrom(gitObject.object as Uint8Array); - for (const entry of tree) { - if (entry.type === 'blob') { - blobs.add(entry.oid); + async function walk(objectId: ObjectId, type: ObjectType): Promise { + // If object was listed as a have then we don't need to walk over it + if (havesSet.has(objectId)) return; + switch (type) { + case 'commit': + { + commits.add(objectId); + const readCommitResult = await git.readCommit({ + fs: efs, + dir, + gitdir: gitDir, + oid: objectId, + }); + const tree = readCommitResult.commit.tree; + const parents = readCommitResult.commit.parent; + await Promise.all([ + walk(tree, 'tree'), + ...parents.map((parent) => walk(parent, 'commit')), + ]); + } + return; + case 'tree': + { + trees.add(objectId); + const readTreeResult = await git.readTree({ + fs: efs, + dir, + gitdir: gitDir, + oid: objectId, + }); + const walkPs: Array> = []; + for (const { oid, type } of readTreeResult.tree) { + walkPs.push(walk(oid, type)); + } + await Promise.all(walkPs); + } + return; + case 'blob': + { + blobs.add(objectId); } - // Only recurse for trees - if (entry.type === 'tree') { - await walk(entry.oid); + return; + case 'tag': + { + tags.add(objectId); + const readTagResult = await git.readTag({ + fs: efs, + dir, + gitdir: gitDir, + oid: objectId, + }); + const { object, type } = readTagResult.tag; + await walk(object, type); } - } + return; + default: + utils.never(); } } // Let's go walking! - for (const oid of oids) { - await walk(oid); - } - return [...commits, ...trees, ...blobs]; -} - -function treeFrom(entries: Uint8Array): TreeObject { - let entriesa: TreeObject = []; - if (Buffer.isBuffer(entries)) { - entriesa = parseBuffer(entries); - } else if (Array.isArray(entries)) { - entriesa = entries.map(nudgeIntoShape); - } else { - throw new gitErrors.ErrorGitReadObject( - 'invalid type passed to GitTree constructor', - ); - } - return entriesa; -} - -function nudgeIntoShape(entry: TreeEntry): TreeEntry { - // It seems strange that this is needed, works without - // if (!entry.oid && entry.sha) { - // entry.oid = entry.sha; // Github - // } - entry.mode = limitModeToAllowed(entry.mode); // Index - if (!entry.type) { - entry.type = 'blob'; // Index + const walkPs: Array> = []; + for (const oid of wants) { + walkPs.push(walk(oid, 'commit')); } - return entry; -} - -function limitModeToAllowed(mode: string | number): string { - if (typeof mode === 'number') { - mode = mode.toString(8); - } - // Tree - if (mode.match(/^0?4.*/)) return '40000'; // Directory - if (mode.match(/^1006.*/)) return '100644'; // Regular non-executable file - if (mode.match(/^1007.*/)) return '100755'; // Regular executable file - if (mode.match(/^120.*/)) return '120000'; // Symbolic link - if (mode.match(/^160.*/)) return '160000'; // Commit (git submodule reference) - throw new gitErrors.ErrorGitUndefinedType( - `Could not understand file mode: ${mode}`, - ); -} - -function parseBuffer(buffer: Buffer): TreeObject { - const _entries: TreeObject = []; - let cursor = 0; - while (cursor < buffer.length) { - const space = buffer.indexOf(32, cursor); - if (space === -1) { - throw new gitErrors.ErrorGitReadObject( - `GitTree: Error parsing buffer at byte location ${cursor}: Could not find the next space character.`, - ); - } - const nullchar = buffer.indexOf(0, cursor); - if (nullchar === -1) { - throw new gitErrors.ErrorGitReadObject( - `GitTree: Error parsing buffer at byte location ${cursor}: Could not find the next null character.`, - ); - } - let mode = buffer.slice(cursor, space).toString('utf8'); - if (mode === '40000') mode = '040000'; // Makes it line up neater in printed output - const type = mode === '040000' ? 'tree' : 'blob'; - const path = buffer.slice(space + 1, nullchar).toString('utf8'); - const oid = buffer.slice(nullchar + 1, nullchar + 21).toString('hex'); - cursor = nullchar + 21; - _entries.push({ mode, path, oid, type }); - } - return _entries; + await Promise.all(walkPs); + return [...commits, ...trees, ...blobs, ...tags]; } /** - * Returns a commit lg for a given ref - * @param fs Filesystem implementation - * @param dir Git working directory - * @param gitdir Git '.git' directory - * @param ref Ref we're getting the commit long for. - * @param depth How many commits to fetch - * @param since Date to start from. - * @param signing Bool to specify signing + * Parses a want/has line from ref negotiation phase. */ -async function log({ - fs, - dir = '.', - gitdir = '.git', - ref = 'HEAD', - depth, - since, - signing = false, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - ref: string; - depth?: number; - since?: number; // Date - signing?: boolean; -}): Promise { - try { - const sinceTimestamp = - since === undefined ? undefined : Math.floor(since.valueOf() / 1000); - // TODO: In the future, we may want to have an API where we return a - // async iterator that emits commits. - const commits: ReadCommitResult[] = []; - const oid = await resolve({ fs, dir, gitdir, ref }); - const tips = [await logCommit({ fs, dir, gitdir, oid, signing })]; - - // eslint-disable-next-line - while (true) { - const commitResult = tips.pop(); - if (commitResult == null) { - throw new gitErrors.ErrorGitReadObject('Commit history invalid'); - } - const commit = commitResult.commit; - - // Stop the log if we've hit the age limit - if ( - sinceTimestamp !== undefined && - commit.committer.timestamp <= sinceTimestamp - ) { - break; - } - - commits.push(commitResult); - - // Stop the loop if we have enough commits now. - if (depth !== undefined && commits.length === depth) break; - - // Add the parents of this commit to the queue - // Note: for the case of a commit with no parents, it will concat an empty array, having no net effect. - for (const oid of commit.parent) { - const commitResult1 = await logCommit({ - fs, - dir, - gitdir, - oid, - signing, - }); - if (!tips.map((commit) => commit.oid).includes(commitResult1.oid)) { - tips.push(commitResult1); - } - } - - // Stop the loop if there are no more commit parents - if (tips.length === 0) break; - - // Process tips in order by age - tips.sort(compareAge); - } - return commits; - } catch (err) { - err.caller = 'git.log'; - throw err; - } -} - -function compareAge(a: ReadCommitResult, b: ReadCommitResult): number { - return a.commit.committer.timestamp - b.commit.committer.timestamp; -} - -async function logCommit({ - fs, - dir = '.', - gitdir = '.git', - oid, - signing, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - oid: string; - signing: boolean; -}): Promise { - const gitObject = await readObject({ fs, dir, gitdir, oid }); - if (gitObject.type !== 'commit') { - throw new gitErrors.ErrorGitUndefinedType( - `Expected type to be commit, but instead found ${gitObject.type}`, - ); - } - const commit = commitFrom(gitObject.object as Buffer | string); - const payload = signing ? withoutSignature(commit) : ''; - const result = { oid: oid, commit: parse(commit), payload: payload }; - return result; -} - -function withoutSignature(commit: string): string { - const commita = normalize(commit); - if (commita.indexOf('\ngpgsig') === -1) return commita; - const headers = commita.slice(0, commita.indexOf('\ngpgsig')); - const message = commita.slice( - commita.indexOf('-----END PGP SIGNATURE-----\n') + - '-----END PGP SIGNATURE-----\n'.length, - ); - return normalize(headers + '\n' + message); -} -function justMessage(commit: string): string { - return normalize(commit.slice(commit.indexOf('\n\n') + 2)); -} - -function parse(commit: string): CommitObject { - return { message: justMessage(commit), ...parseHeaders(commit) }; -} - -function render(obj: CommitObject): string { - return renderHeaders(obj) + '\n' + normalize(obj.message); -} - -function justHeaders(commit: string): string { - return commit.slice(0, commit.indexOf('\n\n')); -} - -function parseHeaders(commit: string): { - parent: string[]; - tree: string; - author: Identity; - committer: Identity; -} { - const headers = justHeaders(commit).split('\n'); - const hs: string[] = []; - for (const h of headers) { - if (h[0] === ' ') { - // Combine with previous header (without space indent) - hs[hs.length - 1] += '\n' + h.slice(1); - } else { - hs.push(h); - } - } - const parent: string[] = []; - const obj: { - parent: string[]; - tree: string; - author: Identity; - committer: Identity; - } = { - parent: parent, - tree: '', - author: { - name: '', - email: '', - timestamp: 0, - timezoneOffset: 0, - }, - committer: { - name: '', - email: '', - timestamp: 0, - timezoneOffset: 0, - }, - }; - for (const h of hs) { - const key = h.slice(0, h.indexOf(' ')); - const value = h.slice(h.indexOf(' ') + 1); - if (key === 'author' || key === 'commiter') { - obj[key] = parseAuthor(value); - } else if (Array.isArray(obj[key])) { - obj[key].push(value); - } else { - obj[key] = value; - } - } - return { ...obj }; -} - -function parseAuthor(author: string): Identity { - const identity = author.match(new RegExp(/^(.*) <(.*)> (.*) (.*)$/)); - let name: string, email: string, timestamp: number, offset: number; - if (identity != null) { - name = identity[1]; - email = identity[2]; - timestamp = Number(identity[3]); - offset = parseTimezoneOffset(identity[4]); - } else { - throw new gitErrors.ErrorGitReadObject('Invalid Author'); - } - return { - name: name, - email: email, - timestamp: timestamp, - timezoneOffset: offset, - }; -} - -function parseTimezoneOffset(offset: string): number { - const matches = offset.match(/(\+|-)(\d\d)(\d\d)/); - if (matches == null) { - throw new gitErrors.ErrorGitReadObject( - 'No timezone found on commit object', - ); - } - const sign = matches[1]; - const hours = matches[2]; - const minutes = matches[3]; - const mins = (sign === '+' ? 1 : -1) * (Number(hours) * 60 + Number(minutes)); - return mins === 0 ? mins : -mins; -} - -function normalize(str: string): string { - // Remove all - str = str.replace(/\r/g, ''); - // No extra newlines up front - str = str.replace(/^\n+/, ''); - // And a single newline at the end - str = str.replace(/\n+$/, '') + '\n'; - return str; -} - -function indent(str: string): string { - return ( - str - .trim() - .split('\n') - .map((x) => ' ' + x) - .join('\n') + '\n' - ); -} - -function renderHeaders(obj: CommitObject): string { - let headers = ''; - if (obj.tree) { - headers += `tree ${obj.tree}\n`; - } else { - headers += `tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n`; // The null tree - } - if (obj.parent) { - for (const p of obj.parent) { - headers += `parent ${p}\n`; - } - } - const author = obj.author; - headers += `author ${author.name} <${author.email}> ${ - author.timestamp - } ${formatTimezoneOffset(author.timezoneOffset)}\n`; - const committer = obj.committer || obj.author; - headers += `committer ${committer.name} <${committer.email}> ${ - committer.timestamp - } ${formatTimezoneOffset(committer.timezoneOffset)}\n`; - if (obj.gpgsig) { - headers += 'gpgsig' + indent(obj.gpgsig); - } - return headers; -} - -function formatTimezoneOffset(minutes: number): string { - const sign = simpleSign(minutes === 0 ? minutes : -minutes); - minutes = Math.abs(minutes); - const hours = Math.floor(minutes / 60); - minutes -= hours * 60; - let strHours = String(hours); - let strMinutes = String(minutes); - if (strHours.length < 2) strHours = '0' + strHours; - if (strMinutes.length < 2) strMinutes = '0' + strMinutes; - return (sign === -1 ? '-' : '+') + strHours + strMinutes; -} - -function simpleSign(n: number): number { - return Math.sign(n) || (Object.is(n, -0) ? -1 : 1); -} - -function commitFrom(commit: string | Buffer): string { - let commitRet: string; - if (typeof commit === 'string') { - commitRet = commit; - } else if (Buffer.isBuffer(commit)) { - commitRet = commit.toString('utf8'); - } else if (typeof commit === 'object') { - commitRet = render(commit); - } else { - throw new gitErrors.ErrorGitReadObject( - 'invalid type passed to GitCommit constructor', +function parseRequestLine( + workingBuffer: Buffer, +): [RequestType, ObjectId, CapabilityList, Buffer] | undefined { + if (workingBuffer.byteLength === 0) return; + const lengthBuffer = workingBuffer.subarray(0, 4).toString(); + if (!/^[0-9a-f]{4}$/.test(lengthBuffer)) { + throw new validationErrors.ErrorParse( + 'expected a 4-length hex number length indicator', ); } - return commitRet; + const length = parseInt(lengthBuffer, 16); + if (length > workingBuffer.byteLength) return; + if (length === 0) return ['SEPARATOR', '', [], workingBuffer.subarray(4)]; + const rest = workingBuffer.subarray(length); + const lineBuffer = workingBuffer.subarray(4, length); + const lineString = lineBuffer.toString().trimEnd(); + const [requestType, id, ...capabilities] = lineString.split(SPACE_STRING); + assertRequestType(requestType); + if (id != null) assertObjectId(id); + return [requestType, id ?? '', capabilities, rest]; } -async function readObject({ - fs, - dir, - gitdir, - oid, - format, - encoding, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - oid: string; - format?: 'parsed' | 'content'; - encoding?: BufferEncoding; -}): Promise; -async function readObject({ - fs, - dir, - gitdir, - oid, - format, - encoding, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - oid: string; - format: 'deflated'; - encoding?: BufferEncoding; -}): Promise; -async function readObject({ - fs, - dir, - gitdir, - oid, - format, - encoding, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - oid: string; - format: 'wrapped'; - encoding?: BufferEncoding; -}): Promise; -async function readObject({ - fs, - dir = '.', - gitdir = '.git', - oid, - format = 'parsed', - encoding, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - oid: string; - format?: 'wrapped' | 'parsed' | 'deflated' | 'content'; - encoding?: BufferEncoding; -}): Promise { - const _format = format === 'parsed' ? 'content' : format; - // Curry the current read method so that the packfile un-deltification - // process can acquire external ref-deltas. - const getExternalRefDelta = (oid: string) => - readObject({ fs, dir, gitdir, oid }); - let result; - // Empty tree - hard-coded so we can use it as a shorthand. - // Note: I think the canonical git implementation must do this too because - // `git cat-file -t 4b825dc642cb6eb9a060e54bf8d69288fbee4904` prints "tree" even in empty repos. - if (oid === '4b825dc642cb6eb9a060e54bf8d69288fbee4904') { - result = { format: 'wrapped', object: Buffer.from(`tree 0\x00`) }; - } - const source = path.join('objects', oid.slice(0, 2), oid.slice(2)); - // Look for it in the loose object directory - try { - result = { - object: await fs.promises.readFile(path.join(gitdir, source)), - format: 'deflated', - source: source, - }; - } catch (err) { - if (err.code === 'ENOENT') { - // Object was not in the loose object directory - } - } - // Check to see if it's in a packfile. - if (result == null) { - // Iterate through all the .pack files - const list = await fs.promises.readdir( - path.join(gitdir, 'objects', 'pack'), - ); - let stringList = list.map((x) => { - return x.toString(); - }); - stringList = stringList.filter((x: string) => x.endsWith('.idx')); - for (const filename of stringList) { - const indexFile = path.join(gitdir, 'objects', 'pack', filename); - const idx = await fs.promises.readFile(indexFile); - const p = fromIdx(Buffer.from(idx), getExternalRefDelta); - if (p == null) { - break; - } - // If the packfile DOES have the oid we're looking for... - if (p.offsets.has(oid)) { - // Make sure the packfile is loaded in memory - if (!p.pack) { - const packFile = indexFile.replace(/idx$/, 'pack'); - const pack = await fs.promises.readFile(packFile); - p.pack = Buffer.from(pack); - } - // Get the resolved git object from the packfile - result = await readPack(p, oid); - result.format = 'content'; - result.source = path.join( - 'objects', - 'pack', - filename.replace(/idx$/, 'pack'), - ); - } - } - } - // If the object has not been found yet throw an error - if (result == null) { - throw new gitErrors.ErrorGitReadObject(`Failed to read object ${oid}`); - } - if (format === 'deflated') { - result.oid = oid; - } else if ( - result.format === 'deflated' || - result.format === 'wrapped' || - result.format === 'content' - ) { - if (result.format === 'deflated') { - result.object = Buffer.from(pako.inflate(result.object)); - result.format = 'wrapped'; - } - if (result.format === 'wrapped') { - if (format === 'wrapped' && result.format === 'wrapped') { - return { - oid: oid, - type: 'wrapped', - format: result.format, - object: result.object, - source: result.source, - }; - } - const sha = new Hash().update(result.object).digest('hex'); - if (sha !== oid) { - throw new gitErrors.ErrorGitReadObject( - `SHA check failed! Expected ${oid}, computed ${sha}`, - ); - } - const { type, object } = unwrap(result.object); - result.type = type; - result.object = object; - result.format = 'content'; - } - if (result.format === 'content') { - if (format === 'content') { - return { - oid: oid, - type: result.type, - format: result.format, - object: result.object, - source: result.source, - }; - } - } - } else { - throw new gitErrors.ErrorGitReadObject( - `Unsupported format type: ${result.format}`, - ); - } - if (format === 'parsed') { - result.format = 'parsed'; - switch (result.type) { - case 'commit': - result.object = commitFrom(result.object); - break; - case 'tree': - // Result.object = treeFrom(result.object).entries(); - break; - case 'blob': - // Here we consider returning a raw Buffer as the 'content' format - // and returning a string as the 'parsed' format - if (encoding) { - result.object = result.object.toString(encoding); - } else { - result.object = new Uint8Array(result.object); - result.format = 'content'; - } - break; - default: - throw new gitErrors.ErrorGitUndefinedType( - `Object ${result.oid} type ${result.type} not recognised`, - ); - } - } else if (result.format === 'deflated' || result.format === 'wrapped') { - result.type = result.format; - } - return result; -} +// Type guards -async function readPack( - p: PackIndex, - oid: string, -): Promise { - const start = p.offsets.get(oid); - if (start == null) { - if (p.getExternalRefDelta) { - return p.getExternalRefDelta(oid); - } else { - throw new gitErrors.ErrorGitReadObject( - `Could not read object ${oid} from packfile`, - ); - } - } - return await readSlice(p, start, oid); +function isObjectId(objectId: unknown): objectId is ObjectId { + if (typeof objectId !== 'string') return false; + return /[0-9a-f]{40}/.test(objectId); } -async function readSlice( - p: PackIndex, - start: number, - oid: string, -): Promise { - const types = { - 0b0010000: 'commit', - 0b0100000: 'tree', - 0b0110000: 'blob', - 0b1000000: 'tag', - 0b1100000: 'ofs_delta', - 0b1110000: 'ref_delta', - }; - if (!p.pack) { - throw new gitErrors.ErrorGitReadObject( - 'Tried to read from a GitPackIndex with no packfile loaded into memory', +function assertObjectId(objectId: unknown): asserts objectId is ObjectId { + if (!isObjectId(objectId)) { + throw new validationErrors.ErrorParse( + `\`objectId\` must be a 40-digit hex number lowercase string, received (${objectId})`, ); } - const raw = p.pack.slice(start); - const reader = new BufferCursor(raw); - const byte = reader.readUInt8(); - // Object type is encoded in bits 654 - const btype = byte & 0b1110000; - let type = types[btype]; - if (type === undefined) { - throw new gitErrors.ErrorGitUndefinedType( - 'Unrecognized type: 0b' + btype.toString(2), - ); - } - // The length encoding get complicated. - // Last four bits of length is encoded in bits 3210 - const lastFour = byte & 0b1111; - let length = lastFour; - // Whether the next byte is part of the variable-length encoded number - // is encoded in bit 7 - const multibyte = byte & 0b10000000; - if (multibyte) { - length = otherVarIntDecode(reader, lastFour); - } - let base; - let object: Buffer; - // Handle deltified objects - if (type === 'ofs_delta') { - const offset = decodeVarInt(reader); - const baseOffset = start - offset; - ({ object: base, type } = await readSlice(p, baseOffset, oid)); - } - // Handle undeltified objects - const buffer = raw.slice(reader.tell()); - object = Buffer.from(pako.inflate(buffer)); - // Assert that the object length is as expected. - if (object.byteLength !== length) { - throw new gitErrors.ErrorGitReadObject( - `Packfile told us object would have length ${length} but it had length ${object.byteLength}`, - ); - } - if (base != null) { - object = Buffer.from(applyDelta(object, base)); - } - return { oid: oid, type: type, format: 'content', object: object }; -} - -function applyDelta(delta: Buffer, source: Buffer): Buffer { - const reader = new BufferCursor(delta); - const sourceSize = readVarIntLE(reader); - - if (sourceSize !== source.byteLength) { - throw new gitErrors.ErrorGitReadObject( - `applyDelta expected source buffer to be ${sourceSize} bytes but the provided buffer was ${source.length} bytes`, - ); - } - const targetSize = readVarIntLE(reader); - let target: Buffer; - - const firstOp = readOp(reader, source); - // Speed optimization - return raw buffer if it's just single simple copy - if (firstOp.byteLength === targetSize) { - target = firstOp; - } else { - // Otherwise, allocate a fresh buffer and slices - target = Buffer.alloc(targetSize); - const writer = new BufferCursor(target); - writer.copy(firstOp); - - while (!reader.eof()) { - writer.copy(readOp(reader, source)); - } - - const tell = writer.tell(); - if (targetSize !== tell) { - throw new gitErrors.ErrorGitReadObject( - `applyDelta expected target buffer to be ${targetSize} bytes but the resulting buffer was ${tell} bytes`, - ); - } - } - return target; -} - -function readVarIntLE(reader: BufferCursor): number { - let result = 0; - let shift = 0; - let byte; - do { - byte = reader.readUInt8(); - result |= (byte & 0b01111111) << shift; - shift += 7; - } while (byte & 0b10000000); - return result; -} - -function readOp(reader: BufferCursor, source: Buffer): Buffer { - const byte = reader.readUInt8(); - const COPY = 0b10000000; - const OFFS = 0b00001111; - const SIZE = 0b01110000; - if (byte & COPY) { - // Copy consists of 4 byte offset, 3 byte size (in LE order) - const offset = readCompactLE(reader, byte & OFFS, 4); - let size = readCompactLE(reader, (byte & SIZE) >> 4, 3); - // Yup. They really did this optimization. - if (size === 0) size = 0x10000; - return source.slice(offset, offset + size); - } else { - // Insert - return reader.slice(byte); - } -} - -function readCompactLE( - reader: BufferCursor, - flags: number, - size: number, -): number { - let result = 0; - let shift = 0; - while (size--) { - if (flags & 0b00000001) { - result |= reader.readUInt8() << shift; - } - flags >>= 1; - shift += 8; - } - return result; -} - -function decodeVarInt(reader: BufferCursor): number { - const bytes: number[] = []; - let byte = 0; - let multibyte = 0; - do { - byte = reader.readUInt8(); - // We keep bits 6543210 - const lastSeven = byte & 0b01111111; - bytes.push(lastSeven); - // Whether the next byte is part of the variable-length encoded number - // is encoded in bit 7 - multibyte = byte & 0b10000000; - } while (multibyte); - // Now that all the bytes are in big-endian order, - // alternate shifting the bits left by 7 and OR-ing the next byte. - // And... do a weird increment-by-one thing that I don't quite understand. - return bytes.reduce((a, b) => ((a + 1) << 7) | b, -1); -} - -function otherVarIntDecode(reader: BufferCursor, startWith: number): number { - let result = startWith; - let shift = 4; - let byte; - do { - byte = reader.readUInt8(); - result |= (byte & 0b01111111) << shift; - shift += 7; - } while (byte & 0b10000000); - return result; } -function fromIdx( - idx: Buffer, - getExternalRefDelta?: ( - oid: string, - ) => Promise, -): PackIndex | undefined { - const reader = new BufferCursor(idx); - const magic = reader.slice(4).toString('hex'); - if (magic !== 'ff744f63') { - return; - } - const version = reader.readUInt32BE(); - if (version !== 2) { - throw new gitErrors.ErrorGitReadObject( - `Unable to read version ${version} packfile IDX. (Only version 2 supported)`, - ); - } - if (idx.byteLength > 2048 * 1024 * 1024) { - throw new gitErrors.ErrorGitReadObject( - `To keep implementation simple, I haven't implemented the layer 5 feature needed to support packfiles > 2GB in size.`, - ); - } - // Skip over fanout table - reader.seek(reader.tell() + 4 * 255); - // Get hashes - const size = reader.readUInt32BE(); - const hashes: string[] = []; - for (let i = 0; i < size; i++) { - const hash = reader.slice(20).toString('hex'); - hashes[i] = hash; - } - reader.seek(reader.tell() + 4 * size); - // Skip over CRCs - // Get offsets - const offsets = new Map(); - for (let i = 0; i < size; i++) { - offsets.set(hashes[i], reader.readUInt32BE()); - } - const packfileSha = reader.slice(20).toString('hex'); - return { - hashes, - offsets, - packfileSha, - getExternalRefDelta, - }; +function isRequestType(requestType: unknown): requestType is RequestType { + if (typeof requestType !== 'string') return false; + // Forcing conversion here just to do the check + return requestTypes.includes(requestType as RequestType); } -function unwrap(buffer: Buffer): { - type: string; - object: Buffer; -} { - const s = buffer.indexOf(32); // First space - const i = buffer.indexOf(0); // First null value - const type = buffer.slice(0, s).toString('utf8'); // Get type of object - // if (type !== 'commit' && type !== 'tree' && type !== 'blob') - // throw new gitErrors.ErrorGitUndefinedType( - // `Object of type ${type} not recognised`, - // ); - const length = buffer.slice(s + 1, i).toString('utf8'); // Get type of object - const actualLength = buffer.length - (i + 1); - // Verify length - if (parseInt(length) !== actualLength) { - throw new gitErrors.ErrorGitReadObject( - `Length mismatch: expected ${length} bytes but got ${actualLength} instead.`, +function assertRequestType( + requestType: unknown, +): asserts requestType is RequestType { + if (!isRequestType(requestType)) { + throw new validationErrors.ErrorParse( + `\`requestType\` must be a string of \`want\`, \`have\`, \`SEPARATOR\`, or \`done\`, received (${requestType})`, ); } - return { - type: type, - object: Buffer.from(buffer.slice(i + 1)), - }; -} - -/** - * Without getting to deep into it, it seems to be prepping and then sending all the required data through the output stream. - * @param fs Filesystem implementation - * @param dir Git working directory - * @param gitdir Git '.git' directory - * @param oids Desired Oids to be sent. - * @param outputStream data output stream. - */ -async function pack({ - fs, - dir = '.', - gitdir = '.git', - oids, - outputStream, -}: { - fs: EncryptedFS; - dir: string; - gitdir: string; - oids: string[]; - outputStream: PassThrough; -}): Promise { - const hash = await createHash('sha1'); - function write(chunk: Buffer | string, enc?: BufferEncoding): void { - if (enc != null) { - outputStream.write(chunk, enc); - } else { - outputStream.write(chunk); - } - hash.update(chunk, enc); - } - function writeObject(object: Uint8Array, stype: string): void { - // Object type is encoded in bits 654 - const type = types[stype]; - if (type === undefined) { - throw new gitErrors.ErrorGitUndefinedType('Unrecognized type: ' + stype); - } - // The length encoding get complicated. - let length = object.length; - // Whether the next byte is part of the variable-length encoded number - // is encoded in bit 7 - let multibyte = length > 0b1111 ? 0b10000000 : 0b0; - // Last four bits of length is encoded in bits 3210 - const lastFour = length & 0b1111; - // Discard those bits - length = length >>> 4; - // The first byte is then (1-bit multibyte?), (3-bit type), (4-bit least sig 4-bits of length) - let byte: number | string = (multibyte | type | lastFour).toString(16); - write(byte, 'hex'); - // Now we keep chopping away at length 7-bits at a time until its zero, - // writing out the bytes in what amounts to little-endian order. - while (multibyte) { - multibyte = length > 0b01111111 ? 0b10000000 : 0b0; - byte = multibyte | (length & 0b01111111); - const unpaddedChunk = byte.toString(16); - const paddedChunk = '0'.repeat(2 - unpaddedChunk.length) + unpaddedChunk; - write(paddedChunk, 'hex'); - length = length >>> 7; - } - // Lastly, we can compress and write the object. - write(Buffer.from(pako.deflate(object))); - } - - write('PACK'); - write('00000002', 'hex'); - // Write a 4 byte (32-bit) int - const unpaddedChunk = oids.length.toString(16); - const paddedChunk = '0'.repeat(8 - unpaddedChunk.length) + unpaddedChunk; - write(paddedChunk, 'hex'); - for (const oid of oids) { - const { type, object } = await readObject({ fs, dir, gitdir, oid }); - writeObject(object as Uint8Array, type); - } - // Write SHA1 checksum - const digest = hash.digest(); - outputStream.end(digest); - return outputStream; -} - -function mux( - protocol: string, // 'side-band' or 'side-band-64k' - packetlines: PassThrough, - packfile: PassThrough, - progress: PassThrough, -): PassThrough { - const MAX_PACKET_LENGTH = protocol === 'side-band-64k' ? 999 : 65519; - const output = new PassThrough(); - packetlines.on('data', (data: Buffer) => { - if (data === null) { - output.write(Buffer.from('0000', 'utf8')); - } else { - output.write(encode(data)); - } - }); - let packfileWasEmpty = true; - let packfileEnded = false; - let progressEnded = false; - const errorEnded = true; - const goodbye = Buffer.concat([ - encode(Buffer.from('010A', 'hex')), - Buffer.from('0000', 'utf8'), - ]); - packfile - .on('data', (data: Buffer) => { - packfileWasEmpty = false; - const buffers = splitBuffer(data, MAX_PACKET_LENGTH); - for (const buffer of buffers) { - output.write(encode(Buffer.concat([Buffer.from('01', 'hex'), buffer]))); - } - }) - .on('end', () => { - packfileEnded = true; - if (!packfileWasEmpty) output.write(goodbye); - if (progressEnded && errorEnded) output.end(); - }); - progress - .on('data', (data: Buffer) => { - const buffers = splitBuffer(data, MAX_PACKET_LENGTH); - for (const buffer of buffers) { - output.write(encode(Buffer.concat([Buffer.from('02', 'hex'), buffer]))); - } - }) - .on('end', () => { - progressEnded = true; - if (packfileEnded && errorEnded) output.end(); - }); - return output; -} - -function splitBuffer(buffer: Buffer, maxBytes: number): Array { - const result: Buffer[] = []; - let index = 0; - while (index < buffer.length) { - const buf = buffer.slice(index, index + maxBytes); - result.push(buf); - index += buf.length; - } - result.push(buffer.slice(index)); - return result; -} - -class BufferCursor { - protected buffer: Buffer; - protected _start: number; - - constructor(buffer: Buffer) { - this.buffer = buffer; - this._start = 0; - } - - eof(): boolean { - return this._start >= this.buffer.length; - } - - tell(): number { - return this._start; - } - - seek(n: number): void { - this._start = n; - } - - slice(n: number): Buffer { - const r = this.buffer.slice(this._start, this._start + n); - this._start += n; - return r; - } - - toString(enc: BufferEncoding, length: number) { - const r = this.buffer.toString(enc, this._start, this._start + length); - this._start += length; - return r; - } - - write(value: string, length: number, enc: BufferEncoding): number { - const r = this.buffer.write(value, this._start, length, enc); - this._start += length; - return r; - } - - copy(source: Buffer, start?: number, end?: number): number { - const r = source.copy(this.buffer, this._start, start, end); - this._start += r; - return r; - } - - readUInt8(): number { - const r = this.buffer.readUInt8(this._start); - this._start += 1; - return r; - } - - writeUInt8(value: number): number { - const r = this.buffer.writeUInt8(value, this._start); - this._start += 1; - return r; - } - - readUInt16BE(): number { - const r = this.buffer.readUInt16BE(this._start); - this._start += 2; - return r; - } - - writeUInt16BE(value: number): number { - const r = this.buffer.writeUInt16BE(value, this._start); - this._start += 2; - return r; - } - - readUInt32BE(): number { - const r = this.buffer.readUInt32BE(this._start); - this._start += 4; - return r; - } - - writeUInt32BE(value: number): number { - const r = this.buffer.writeUInt32BE(value, this._start); - this._start += 4; - return r; - } } export { - createGitPacketLine, - uploadPack, - packObjects, - pack, - mux, - iteratorFromData, - encode, - fromIdx, - listRefs, - resolve, - readObject, + PACK_CHUNK_SIZE, + HEAD_REFERENCE, + REFERENCES_STRING, + SIDE_BAND_64_CAPABILITY, + AGENT_CAPABILITY, + SPACE_STRING, + CHANNEL_DATA, + CHANNEL_PROGRESS, + CHANNEL_ERROR, + BUFFER_FORMAT, + REFERENCE_DISCOVERY_HEADER, + NULL_BUFFER, + LINE_FEED_BUFFER, + ZERO_ID_BUFFER, + EMPTY_LIST_CAPABILITIES_BUFFER, + SPACE_BUFFER, + FLUSH_PACKET_BUFFER, + NAK_BUFFER, + DUMMY_PROGRESS_BUFFER, + listReferencesGenerator, + referenceCapability, + listObjects, + parseRequestLine, + isObjectId, + assertObjectId, + isRequestType, + assertRequestType, }; diff --git a/src/nodes/agent/handlers/VaultsGitPackGet.ts b/src/nodes/agent/handlers/VaultsGitPackGet.ts index 9fdec6f82..d5c38b531 100644 --- a/src/nodes/agent/handlers/VaultsGitPackGet.ts +++ b/src/nodes/agent/handlers/VaultsGitPackGet.ts @@ -1,6 +1,5 @@ import type { DB } from '@matrixai/db'; import type { JSONObject, JSONRPCRequest } from '@matrixai/rpc'; -import type { PassThrough } from 'readable-stream'; import type { VaultName } from '../../../vaults/types'; import type ACL from '../../../acl/ACL'; import type VaultManager from '../../../vaults/VaultManager'; @@ -69,37 +68,22 @@ class VaultsGitPackGet extends RawHandler<{ } // Getting data - let sideBand: PassThrough; - let progressStream: PassThrough; - const outputStream = new ReadableStream({ - start: async (controller) => { - const body = new Array(); + let packRequestGen: AsyncGenerator; + const outputStream = new ReadableStream({ + start: async () => { + const body: Array = []; for await (const message of inputStream) { - body.push(message); + body.push(Buffer.from(message)); } - [sideBand, progressStream] = await vaultManager.handlePackRequest( - vaultId, - Buffer.concat(body), - ); - controller.enqueue(Buffer.from('0008NAK\n')); - sideBand.on('data', async (data: Uint8Array) => { - controller.enqueue(data); - sideBand.pause(); - }); - sideBand.on('end', async () => { - controller.close(); - }); - sideBand.on('error', (e) => { - controller.error(e); - }); - progressStream.write(Buffer.from('0014progress is at 50%\n')); - progressStream.end(); + packRequestGen = vaultManager.handlePackRequest(vaultId, body); }, - pull: () => { - sideBand.resume(); + pull: async (controller) => { + const next = await packRequestGen.next(); + if (next.done === true) return controller.close(); + controller.enqueue(next.value); }, - cancel: (e) => { - sideBand.destroy(e); + cancel: async () => { + await packRequestGen.return(); }, }); return [{}, outputStream]; diff --git a/src/vaults/VaultInternal.ts b/src/vaults/VaultInternal.ts index 38c830493..2575144cc 100644 --- a/src/vaults/VaultInternal.ts +++ b/src/vaults/VaultInternal.ts @@ -145,7 +145,6 @@ class VaultInternal { }), ); } - const vaultIdEncoded = vaultsUtils.encodeVaultId(vaultId); logger.info(`Cloning ${this.name} - ${vaultIdEncoded}`); const vault = new this({ @@ -169,6 +168,7 @@ class VaultInternal { targetNodeId, async (connection) => { const client = connection.getClient(); + const [request, vaultName, remoteVaultId] = await vault.request( client, targetVaultNameOrId, @@ -766,6 +766,43 @@ class VaultInternal { return commitIdLatest; } + /** + * Creates a request arrow function that implements an api that `isomorphic-git` expects to use when making a http + * request. It makes RPC calls to `vaultsGitInfoGet` for the ref advertisement phase and `vaultsGitPackGet` for the + * git pack phase. + * + * `vaultsGitInfoGet` wraps a call to `gitHttp.advertiseRefGenerator` and `vaultsGitPackGet` to + * `gitHttp.generatePackRequest`. + * + * ``` + * ┌─────────┐ ┌───────────────────────────┐ + * │ │ │ │ + * ┌──────────────────────┐ │ RPC │ │ │ + * │ │ │ │ │ *advertiseRefGenerator() │ + * │ ├────────┼─────────┼────► │ + * │ vault.request() │ │ │ │ │ + * │ │ │ │ └────┬──────────────────────┘ + * │ ├──┐ │ │ │ + * │ │ │ │ │ ┌────▼──────────────────────┐ + * └──────────────────────┘ │ │ │ │ │ + * │ │ │ │ *referenceListGenerator() │ + * │ │ │ │ │ + * │ │ │ └───────────────────────────┘ + * │ │ │ + * │ │ │ ┌───────────────────────────┐ + * └─────┼─────────┼────┤ │ + * │ │ │ *generatePackRequest() │ + * │ │ │ │ + * │ │ └────┬──────────────────────┘ + * └─────────┘ │ + * ┌────▼──────────────────────┐ + * │ │ + * │ *generatePackData() │ + * │ │ + * └───────────────────────────┘ + * + * ``` + */ protected async request( client: RPCClient, vaultNameOrId: VaultId | VaultName, @@ -793,11 +830,6 @@ class VaultInternal { const vaultName = result.vaultName; const remoteVaultId = ids.parseVaultId(result.vaultIdEncoded); - // Collect the response buffers from the GET request - const infoResponse: Uint8Array[] = []; - for await (const chunk of vaultsGitInfoGetStream.readable) { - infoResponse.push(chunk); - } return [ async function ({ url, @@ -808,20 +840,19 @@ class VaultInternal { url: string; method: string; headers: POJO; - body: Buffer[]; + body: Array; }) { if (method === 'GET') { // Send back the GET request info response return { url: url, method: method, - body: infoResponse, + body: vaultsGitInfoGetStream.readable, headers: headers, statusCode: 200, statusMessage: 'OK', }; } else if (method === 'POST') { - const responseBuffers: Array = []; const vaultsGitPackGetStream = await client.methods.vaultsGitPackGet({ nameOrId: result.vaultIdEncoded as string, vaultAction, @@ -829,13 +860,11 @@ class VaultInternal { const writer = vaultsGitPackGetStream.writable.getWriter(); await writer.write(body[0]); await writer.close(); - for await (const value of vaultsGitPackGetStream.readable) { - responseBuffers.push(value); - } + return { url: url, method: method, - body: responseBuffers, + body: vaultsGitPackGetStream.readable, headers: headers, statusCode: 200, statusMessage: 'OK', diff --git a/src/vaults/VaultManager.ts b/src/vaults/VaultManager.ts index c27a8e660..22f45305e 100644 --- a/src/vaults/VaultManager.ts +++ b/src/vaults/VaultManager.ts @@ -21,7 +21,6 @@ import type { LockRequest } from '@matrixai/async-locks'; import type { Key } from '../keys/types'; import path from 'path'; import { DB } from '@matrixai/db'; -import { PassThrough } from 'readable-stream'; import { EncryptedFS, errors as encryptedFsErrors } from 'encryptedfs'; import Logger from '@matrixai/logger'; import { @@ -36,8 +35,7 @@ import * as vaultsEvents from './events'; import * as vaultsUtils from './utils'; import * as vaultsErrors from './errors'; import * as utils from '../utils'; -import * as gitUtils from '../git/utils'; -import * as gitErrors from '../git/errors'; +import * as gitHttp from '../git/http'; import * as nodesUtils from '../nodes/utils'; import * as keysUtils from '../keys/utils'; import config from '../config'; @@ -806,7 +804,7 @@ class VaultManager { public async *handleInfoRequest( vaultId: VaultId, tran?: DBTransaction, - ): AsyncGenerator { + ): AsyncGenerator { if (tran == null) { const handleInfoRequest = (tran) => this.handleInfoRequest(vaultId, tran); return yield* this.db.withTransactionG(async function* (tran) { @@ -820,22 +818,13 @@ class VaultManager { this.vaultLocks.lock([vaultId.toString(), RWLockWriter, 'read']), vault.getLock().read(), ], - async function* (): AsyncGenerator { - // Adherence to git protocol - yield Buffer.from( - gitUtils.createGitPacketLine('# service=git-upload-pack\n'), - ); - yield Buffer.from('0000'); + async function* (): AsyncGenerator { // Read the commit state of the vault - const uploadPack = await gitUtils.uploadPack({ - fs: efs, + yield* gitHttp.advertiseRefGenerator({ + efs, dir: path.join(vaultsUtils.encodeVaultId(vaultId), 'contents'), - gitdir: path.join(vaultsUtils.encodeVaultId(vaultId), '.git'), - advertiseRefs: true, + gitDir: path.join(vaultsUtils.encodeVaultId(vaultId), '.git'), }); - for (const buffer of uploadPack) { - yield buffer; - } }, ); } @@ -845,50 +834,34 @@ class VaultManager { * cloned or pulled from */ @ready(new vaultsErrors.ErrorVaultManagerNotRunning()) - public async handlePackRequest( + public async *handlePackRequest( vaultId: VaultId, - body: Buffer, + body: Array, tran?: DBTransaction, - ): Promise<[PassThrough, PassThrough]> { + ): AsyncGenerator { if (tran == null) { - return this.db.withTransactionF((tran) => - this.handlePackRequest(vaultId, body, tran), - ); + // Lambda to maintain `this` context + const handlePackRequest = (tran: DBTransaction) => + this.handlePackRequest(vaultId, body, tran); + return yield* this.db.withTransactionG(async function* (tran) { + return yield* handlePackRequest(tran); + }); } const vault = await this.getVault(vaultId, tran); - return await withF( + const efs = this.efs; + yield* withG( [ this.vaultLocks.lock([vaultId.toString(), RWLockWriter, 'read']), vault.getLock().read(), ], - async () => { - if (body.toString().slice(4, 8) === 'want') { - // Parse the request to get the wanted git object - const wantedObjectId = body.toString().slice(9, 49); - const packResult = await gitUtils.packObjects({ - fs: this.efs, - dir: path.join(vaultsUtils.encodeVaultId(vaultId), 'contents'), - gitdir: path.join(vaultsUtils.encodeVaultId(vaultId), '.git'), - refs: [wantedObjectId], - }); - // Generate a contents and progress stream - const readable = new PassThrough(); - const progressStream = new PassThrough(); - const sideBand = gitUtils.mux( - 'side-band-64', - readable, - packResult.packstream, - progressStream, - ); - return [sideBand, progressStream]; - } else { - throw new gitErrors.ErrorGitUnimplementedMethod( - `Request of type '${body - .toString() - .slice(4, 8)}' not valid, expected 'want'`, - ); - } + async function* (): AsyncGenerator { + yield* gitHttp.generatePackRequest({ + efs, + dir: path.join(vaultsUtils.encodeVaultId(vaultId), 'contents'), + gitDir: path.join(vaultsUtils.encodeVaultId(vaultId), '.git'), + body: body, + }); }, ); } diff --git a/src/vaults/utils.ts b/src/vaults/utils.ts index e9f216e62..53397c8f8 100644 --- a/src/vaults/utils.ts +++ b/src/vaults/utils.ts @@ -43,7 +43,10 @@ function commitAuthor(nodeId: NodeId): { name: string; email: string } { }; } -async function* readDirRecursively(fs, dir = '.') { +async function* readDirRecursively( + fs: FileSystemReadable, + dir = '.', +): AsyncGenerator { const dirents = await fs.promises.readdir(dir); for (const dirent of dirents) { const res = path.join(dir, dirent.toString()); diff --git a/tests/git/http.test.ts b/tests/git/http.test.ts new file mode 100644 index 000000000..898a4ab63 --- /dev/null +++ b/tests/git/http.test.ts @@ -0,0 +1,413 @@ +import fs from 'fs'; +import path from 'path'; +import os from 'os'; +import git from 'isomorphic-git'; +import { test } from '@fast-check/jest'; +import fc from 'fast-check'; +import * as gitHttp from '@/git/http'; +import * as validationErrors from '@/validation/errors'; +import * as gitTestUtils from './utils'; + +describe('Git Http', () => { + let dataDir: string; + // This is just a collection of commonly used objects that we can just spread into calls + let gitDirs: { + efs: any; // Any here to act as fs or the efs since the overlap enough for testing + fs: any; + dir: string; + gitDir: string; + gitdir: string; + }; + beforeAll(async () => { + dataDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'polykey-test-'), + ); + const dir = path.join(dataDir, 'repository'); + const gitdir = path.join(dir, '.git'); + gitDirs = { + efs: fs, + fs, + dir, + gitDir: gitdir, + gitdir, + }; + }); + afterAll(async () => { + await fs.promises.rm(dataDir, { + force: true, + recursive: true, + }); + }); + test('packetLine', async () => { + /** + * Pkt-line actual value + * --------------------------------- + * "0006a\n" "a\n" + * "0005a" "a" + * "000bfoobar\n" "foobar\n" + * "0004" "" + */ + const tests = [ + ['0006a\n', 'a\n'], + ['0005a', 'a'], + ['000bfoobar\n', 'foobar\n'], + ['0004', ''], + ]; + for (const [output, input] of tests) { + const result = gitHttp.packetLineBuffer(Buffer.from(input)); + const comp = Buffer.compare(result, Buffer.from(output)); + expect(comp).toBe(0); + } + }); + test('packetLineWithChannel', async () => { + /** + * Pkt-line actual value + * --------------------------------- + * "0007a\n" "a\n" + * "0006a" "a" + * "000cfoobar\n" "foobar\n" + * "0005" "" + */ + const tests = [ + ['0007\x01a\n', 'a\n'], + ['0006\x01a', 'a'], + ['000c\x01foobar\n', 'foobar\n'], + ['0005\x01', ''], + ]; + for (const [output, input] of tests) { + const result = gitHttp.packetLineBuffer(Buffer.from(input), 1); + const comp = Buffer.compare(result, Buffer.from(output)); + expect(comp).toBe(0); + } + }); + test('advertiseRefGenerator', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], + }); + const gen = gitHttp.advertiseRefGenerator(gitDirs); + let response = ''; + for await (const result of gen) { + response += result.toString(); + } + // Header + expect(response).toInclude('001e# service=git-upload-pack\n'); + // Includes flush packets + expect(response).toInclude('0000'); + // Includes capabilities + expect(response).toIncludeMultiple([ + 'side-band-64k', + 'symref=HEAD:refs/heads/master', + 'agent=git/isomorphic-git@1.8.1', + ]); + // HEAD commit is listed twice as `HEAD` and `master` + const headCommit = (await git.log({ ...gitDirs, ref: 'HEAD' }))[0].oid; + expect(response).toIncludeRepeated(headCommit, 2); + // `HEAD` and `master` are both listed + expect(response).toIncludeMultiple(['HEAD', 'master']); + // A null byte is included to delimit first line and capabilities + expect(response).toInclude('\0'); + }); + test('parsePackRequest', async () => { + const data = Buffer.from( + `0060want aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa side-band-64k agent=git/isomorphic-git@1.24.5\n0032have bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n00000009done\n`, + ); + const [wants, haves, capabilities] = await gitHttp.parsePackRequest([data]); + expect(wants).toMatchObject(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa']); + expect(haves).toMatchObject(['bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb']); + expect(capabilities).toMatchObject([ + 'side-band-64k', + 'agent=git/isomorphic-git@1.24.5', + ]); + }); + test.prop([fc.uint8Array({ minLength: 100 })])( + 'parsePackRequest handles random data', + async (data) => { + await expect( + gitHttp.parsePackRequest([Buffer.from(data)]), + ).rejects.toThrow(validationErrors.ErrorParse); + }, + ); + test('generatePackData', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], + }); + const objectIds = await gitTestUtils.listGitObjects(gitDirs); + const gen = gitHttp.generatePackData({ + ...gitDirs, + objectIds, + }); + let acc = Buffer.alloc(0); + for await (const line of gen) { + acc = Buffer.concat([acc, line.subarray(5)]); + } + const packPath = path.join(gitDirs.dir, 'pack'); + await fs.promises.writeFile(packPath, acc); + // Checking that all objectIds are included and packFile is valid using isometric git + const result = await git.indexPack({ + ...gitDirs, + filepath: 'pack', + }); + expect(result.oids).toIncludeAllMembers(objectIds); + }); + test('generatePackRequest', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], + }); + const gen = gitHttp.generatePackRequest({ + ...gitDirs, + body: [], + }); + let response = ''; + for await (const line of gen) { + response += line.toString(); + } + // NAK response for no common objects + expect(response).toInclude('0008NAK\n'); + // Pack data included on chanel 1 + expect(response).toInclude('\x01PACK'); + // Progress data included on chanel 2 + expect(response).toInclude('0017\x02progress is at 50%'); + // Flush packet included + expect(response).toInclude('0000'); + }); + test('end to end clone', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], + }); + + const request = gitTestUtils.request(gitDirs); + const newDir = path.join(dataDir, 'newRepo'); + const newDirs = { + fs, + dir: newDir, + gitdir: path.join(newDir, '.git'), + gitDir: path.join(newDir, '.git'), + }; + + await git.clone({ + fs, + dir: newDir, + http: { request }, + url: 'http://', + }); + // Files are checked out and correct + expect( + (await fs.promises.readFile(path.join(newDirs.dir, 'file1'))).toString(), + ).toBe('this is a changed file'); + expect( + (await fs.promises.readFile(path.join(newDirs.dir, 'file2'))).toString(), + ).toBe('this is another file'); + }); + test('end to end Pull', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], + }); + const newDir = path.join(dataDir, 'newRepo'); + const newDirs = { + fs, + dir: newDir, + gitdir: path.join(newDir, '.git'), + gitDir: path.join(newDir, '.git'), + }; + const request = gitTestUtils.request(gitDirs); + await git.clone({ + fs, + dir: newDir, + http: { request }, + url: 'http://', + }); + // Add more history + await gitTestUtils.createGitRepo({ + ...gitDirs, + init: false, + author: 'tester', + commits: [ + { + message: 'commit4', + files: [ + { + name: 'file3', + contents: 'this is another file3', + }, + ], + }, + ], + }); + await git.pull({ + ...newDirs, + http: { request }, + url: 'http://', + ref: 'HEAD', + singleBranch: true, + fastForward: true, + fastForwardOnly: true, + author: { + name: 'asd', + }, + }); + // After pulling, we expect the new repo to include all history from the old + const logOld = ( + await git.log({ + ...gitDirs, + ref: 'HEAD', + }) + ).map((v) => v.oid); + const logNew = ( + await git.log({ + ...newDirs, + ref: 'HEAD', + }) + ).map((v) => v.oid); + expect(logNew).toIncludeAllMembers(logOld); + }); +}); diff --git a/tests/git/utils.test.ts b/tests/git/utils.test.ts index 0ea8abe06..d09bdd1e5 100644 --- a/tests/git/utils.test.ts +++ b/tests/git/utils.test.ts @@ -1,247 +1,248 @@ -import type { ReadCommitResult } from 'isomorphic-git'; -import type { PackIndex } from '@/git/types'; import fs from 'fs'; import os from 'os'; import path from 'path'; -import { EncryptedFS } from 'encryptedfs'; -import Logger, { LogLevel, StreamHandler } from '@matrixai/logger'; -import * as gitErrors from '@/git/errors'; -import * as keysUtils from '@/keys/utils'; +import git from 'isomorphic-git'; +import { test } from '@fast-check/jest'; +import fc from 'fast-check'; import * as gitUtils from '@/git/utils'; +import * as validationErrors from '@/validation/errors'; import * as gitTestUtils from './utils'; describe('Git utils', () => { - const logger = new Logger('VaultManager Test', LogLevel.WARN, [ - new StreamHandler(), - ]); let dataDir: string; - let commits: ReadCommitResult[]; - let firstCommit: ReadCommitResult; - let objectsPath: string; - let efs: EncryptedFS; - let dbKey: Buffer; - + let gitDirs: { + efs: any; // Any here to act as fs or the efs since the overlap enough for testing + fs: any; + dir: string; + gitDir: string; + gitdir: string; + }; beforeAll(async () => { dataDir = await fs.promises.mkdtemp( path.join(os.tmpdir(), 'polykey-test-'), ); - objectsPath = path.join('.git', 'objects'); - dbKey = keysUtils.generateKey(); - efs = await EncryptedFS.createEncryptedFS({ - dbKey, - dbPath: dataDir, - logger, - }); - await efs.start(); - commits = await gitTestUtils.createGitRepo({ - efs, - packFile: true, - indexFile: true, - }); - firstCommit = commits[0]; + const dir = path.join(dataDir, 'repository'); + const gitdir = path.join(dir, '.git'); + gitDirs = { + efs: fs, + fs, + dir, + gitDir: gitdir, + gitdir, + }; }); - afterAll(async () => { - await efs.stop(); - await efs.destroy(); await fs.promises.rm(dataDir, { force: true, recursive: true, }); }); - describe('read index', () => { - test('of a packfile', async () => { - const packDir = path.join('.git', 'objects', 'pack'); - const packfile = (await efs.promises.readdir(packDir))[0] as string; - const idx = (await efs.promises.readFile( - path.join(packDir, packfile), - )) as Buffer; - const p = gitUtils.fromIdx(idx) as PackIndex; - expect(p).not.toBeUndefined(); - const packSha = packfile.substring(5, 45); - expect(p.packfileSha).toBe(packSha); - const oids = commits.map((commit) => commit.oid); - for (const oid of oids) { - expect(p.offsets.has(oid)).toBeTruthy(); - } + + test('listReferencesGenerator', async () => { + // Start with creating a git repo with commits + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], }); + + const headObjectId = ( + await git.log({ + ...gitDirs, + depth: 1, + }) + )[0].oid; + const expectedReferences = ['HEAD', 'refs/heads/master']; + for await (const [reference, objectId] of gitUtils.listReferencesGenerator({ + ...gitDirs, + })) { + expect(reference).toBeOneOf(expectedReferences); + expect(objectId).toBe(headObjectId); + } }); - describe('list refs', () => { - test('on master', async () => { - const refs = await gitUtils.listRefs( - efs, - '.git', - path.join('refs', 'heads'), - ); - expect(refs).toEqual(['master']); - }); + test('refCapability', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], + }); + const branches = await git.listBranches({ ...gitDirs }); + for (const reference of ['HEAD', ...branches]) { + const referenceCapability = await gitUtils.referenceCapability({ + ...gitDirs, + reference, + }); + // Includes the `symref` indicator of the capability + expect(referenceCapability).toInclude('symref='); + // The `:` separator + expect(referenceCapability).toInclude(':'); + // No spaces + expect(referenceCapability).not.toInclude(' '); + } }); - describe('encoding', () => { - test('a string', async () => { - const gitEncodedString = gitUtils.encode('hello world\n'); - expect(gitEncodedString.equals(Buffer.from('0010hello world\n'))).toBe( - true, - ); - }); - test('an empty string', async () => { - const gitEncodedString = gitUtils.encode(''); - expect(gitEncodedString.equals(Buffer.from('0004'))).toBe(true); - }); - test('an upload pack', async () => { - const uploadPackBuffers = (await gitUtils.uploadPack({ - fs: efs, - advertiseRefs: true, - })) as Buffer[]; - const uploadPack = Buffer.concat(uploadPackBuffers); - expect(uploadPack.toString('utf8')).toBe( - `007d${firstCommit.oid} HEAD\0side-band-64k symref=HEAD:refs/heads/master agent=git/isomorphic-git@1.8.1 -003f${firstCommit.oid} refs/heads/master -0000`, - ); + test('listObjects', async () => { + await gitTestUtils.createGitRepo({ + ...gitDirs, + author: 'tester', + commits: [ + { + message: 'commit1', + files: [ + { + name: 'file1', + contents: 'this is a file', + }, + ], + }, + { + message: 'commit2', + files: [ + { + name: 'file2', + contents: 'this is another file', + }, + ], + }, + { + message: 'commit3', + files: [ + { + name: 'file1', + contents: 'this is a changed file', + }, + ], + }, + ], }); + + const commitIds = ( + await git.log({ + ...gitDirs, + ref: 'HEAD', + }) + ).map((v) => v.oid); + + const objectList = await gitUtils.listObjects({ + ...gitDirs, + wants: commitIds, + haves: [], + }); + const expectedObjectIds = await gitTestUtils.listGitObjects(gitDirs); + // Found objects should include all the commits + expect(objectList).toIncludeAllMembers(commitIds); + // Since it was an exhaustive walk of all commits, all objectIds should be included + expect(objectList).toIncludeAllMembers(expectedObjectIds); }); - describe('resolve refs', () => { - test('to a commit oid', async () => { - const ref = await gitUtils.resolve({ - fs: efs, - ref: commits[0].oid, - }); - expect(ref).toBe(firstCommit.oid); - }); - test('to HEAD', async () => { - const ref = await gitUtils.resolve({ fs: efs, ref: 'HEAD' }); - expect(ref).toBe(firstCommit.oid); - }); - test('to HEAD including depth', async () => { - const ref = await gitUtils.resolve({ fs: efs, ref: 'HEAD', depth: 2 }); - expect(ref).toBe('refs/heads/master'); - }); - test('to non-existant refs', async () => { - await expect(() => - gitUtils.resolve({ fs: efs, ref: 'this-is-not-a-ref' }), - ).rejects.toThrow(gitErrors.ErrorGitUndefinedRefs); - }); + test.prop([ + gitTestUtils.gitRequestDataArb, + fc.uint8Array({ size: 'medium' }), + ])('parseRequestLine', async (lineData, rest) => { + const data = gitTestUtils.generateGitNegotiationLine( + lineData, + Buffer.from(rest), + ); + const result = gitUtils.parseRequestLine(data); + switch (lineData.type) { + case 'want': + { + expect(result).toBeDefined(); + const [type, objectId, capabilityList, resultRest] = result!; + expect(type).toBe(lineData.type); + expect(objectId).toBe(lineData.objectId); + expect(capabilityList).toMatchObject(lineData.capabilityList); + expect(Buffer.compare(resultRest, rest)).toBe(0); + } + break; + case 'have': + { + expect(result).toBeDefined(); + const [type, objectId, capabilityList, resultRest] = result!; + expect(type).toBe(lineData.type); + expect(objectId).toBe(lineData.objectId); + expect(capabilityList.length).toBe(0); + expect(Buffer.compare(resultRest, rest)).toBe(0); + } + break; + case 'SEPARATOR': + case 'done': + { + expect(result).toBeDefined(); + const [type, objectId, capabilityList, resultRest] = result!; + expect(type).toBe(lineData.type); + expect(objectId).toBe(''); + expect(capabilityList.length).toBe(0); + expect(Buffer.compare(resultRest, rest)).toBe(0); + } + break; + case 'none': + { + expect(result).toBeUndefined(); + } + break; + } }); - describe('read an object', () => { - test('missing', async () => { - await expect(() => - gitUtils.readObject({ - fs: efs, - dir: '.', - gitdir: '.git', - oid: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', - }), - ).rejects.toThrow(gitErrors.ErrorGitReadObject); - }); - test('parsed', async () => { - const ref = await gitUtils.readObject({ - fs: efs, - dir: '.', - gitdir: '.git', - oid: firstCommit.oid, - }); - expect(ref.format).toEqual('parsed'); - expect(ref.type).toEqual('commit'); - }); - test('content', async () => { - const ref = await gitUtils.readObject({ - fs: efs, - dir: '.', - gitdir: '.git', - oid: firstCommit.oid, - format: 'content', - }); - expect(ref.format).toEqual('content'); - expect(ref.type).toEqual('commit'); - expect(ref.source).toBe( - path.join( - 'objects', - firstCommit.oid.substring(0, 2), - firstCommit.oid.substring(2), - ), - ); - const object = ref.object.toString(); - expect(object).toContain(firstCommit.commit.tree); - expect(object).toContain(firstCommit.commit.parent[0]); - expect(object).toContain(firstCommit.commit.author.name); - expect(object).toContain(firstCommit.commit.author.timestamp.toString()); - expect(object).toContain(firstCommit.commit.committer.name); - expect(object).toContain( - firstCommit.commit.committer.timestamp.toString(), - ); - }); - test('wrapped', async () => { - const ref = await gitUtils.readObject({ - fs: efs, - dir: '.', - gitdir: '.git', - oid: firstCommit.oid, - format: 'wrapped', - }); - expect(ref.format).toEqual('wrapped'); - expect(ref.type).toEqual('wrapped'); - expect(ref.source).toBe( - path.join( - 'objects', - firstCommit.oid.substring(0, 2), - firstCommit.oid.substring(2), - ), - ); - const object = ref.object.toString(); - expect(object).toContain(firstCommit.commit.tree); - expect(object).toContain(firstCommit.commit.parent[0]); - expect(object).toContain(firstCommit.commit.author.name); - expect(object).toContain(firstCommit.commit.author.timestamp.toString()); - expect(object).toContain(firstCommit.commit.committer.name); - expect(object).toContain( - firstCommit.commit.committer.timestamp.toString(), + test.prop([fc.uint8Array({ size: 'medium', minLength: 1 }).noShrink()])( + 'parseRequestLine handles bad data', + async (randomData) => { + expect(() => gitUtils.parseRequestLine(Buffer.from(randomData))).toThrow( + validationErrors.ErrorParse, ); - }); - test('deflated', async () => { - const ref = await gitUtils.readObject({ - fs: efs, - dir: '.', - gitdir: '.git', - oid: firstCommit.oid, - format: 'deflated', - }); - expect(ref.format).toEqual('deflated'); - expect(ref.type).toEqual('deflated'); - expect(ref.source).toBe( - path.join( - 'objects', - firstCommit.oid.substring(0, 2), - firstCommit.oid.substring(2), - ), - ); - }); - test('from packfile', async () => { - const packName = await gitTestUtils.getPackID(efs); - await efs.promises.rename( - path.join(objectsPath, firstCommit.oid.substring(0, 2)), - path.join(objectsPath, 'TEST'), - ); - const ref = await gitUtils.readObject({ - fs: efs, - dir: '.', - gitdir: '.git', - oid: firstCommit.oid, - format: 'deflated', - }); - expect(ref.format).toEqual('content'); - expect(ref.type).toEqual('commit'); - expect(ref.source).toBe( - path.join('objects', 'pack', `pack-${packName}.pack`), - ); - const object = ref.object.toString(); - expect(object).toContain(firstCommit.commit.tree); - expect(object).toContain(firstCommit.commit.parent[0]); - expect(object).toContain(firstCommit.commit.author.name); - expect(object).toContain(firstCommit.commit.author.timestamp.toString()); - expect(object).toContain(firstCommit.commit.committer.name); - expect(object).toContain( - firstCommit.commit.committer.timestamp.toString(), - ); - }); - }); + }, + ); }); diff --git a/tests/git/utils.ts b/tests/git/utils.ts index d515574c3..dbcafaafb 100644 --- a/tests/git/utils.ts +++ b/tests/git/utils.ts @@ -1,72 +1,265 @@ +import type { POJO } from '@'; +import type { CapabilityList } from '@/git/types'; +import type { Arbitrary } from 'fast-check'; import type { EncryptedFS } from 'encryptedfs'; -import type { ReadCommitResult } from 'isomorphic-git'; import path from 'path'; import git from 'isomorphic-git'; +import fc from 'fast-check'; +import * as gitUtils from '@/git/utils'; +import * as gitHttp from '@/git/http'; +import * as utils from '@/utils'; +/** + * Utility for quickly creating a git repo with history + */ async function createGitRepo({ efs, - packFile, - indexFile, + dir, + gitdir, + author, + commits, + init = true, }: { efs: EncryptedFS; - packFile?: boolean; - indexFile?: boolean; -}): Promise { - await git.init({ + dir: string; + gitdir: string; + author: string; + commits: Array<{ + message: string; + files: Array<{ name: string; contents: string }>; + }>; + init?: boolean; +}) { + const gitDirs = { fs: efs, - dir: '.', - }); - await git.commit({ - fs: efs, - dir: '.', + dir, + gitdir, + }; + const authorDetails = { author: { - name: 'TestCommitter', + name: author, + email: `${author}@test.com`, }, - message: 'Initial Commit', - }); - await efs.promises.writeFile( - path.join('.git', 'packed-refs'), - '# pack-refs with: peeled fully-peeled sorted', - ); - for (let i = 0; i < 10; i++) { - const fp = i.toString(); - await efs.promises.writeFile(fp, 'secret ' + i.toString()); - await git.commit({ - fs: efs, - dir: '.', - author: { - name: 'TestCommitter ' + i.toString(), - }, - message: 'Commit ' + i.toString(), + committer: { + name: author, + email: `${author}@test.com`, + }, + }; + if (init) { + await git.init({ + ...gitDirs, }); } - const log = await git.log({ - fs: efs, - dir: '.', - }); - if (packFile) { - const pack = await git.packObjects({ - fs: efs, - dir: '.', - oids: [...log.map((item) => item.oid)], - write: true, + for (const { message, files } of commits) { + await Promise.all( + files.map(({ name, contents }) => + efs.promises.writeFile(path.join(gitDirs.dir, name), contents), + ), + ); + await git.add({ + ...gitDirs, + filepath: files.map(({ name }) => name), }); - if (indexFile) { - await git.indexPack({ - fs: efs, - dir: '.', - filepath: path.join('.git', 'objects', 'pack', pack.filename), - }); + await git.commit({ + ...gitDirs, + ...authorDetails, + message, + }); + } +} + +const objectsDirName = 'objects'; +const excludedDirs = ['pack', 'info']; + +/** + * Walks the filesystem to list out all git objects in the objects directory + * @param fs + * @param gitDir + */ +async function listGitObjects({ + efs, + gitDir, +}: { + efs: EncryptedFS; + gitDir: string; +}) { + const objectsDirPath = path.join(gitDir, objectsDirName); + const objectSet: Set = new Set(); + const objectDirs = await efs.promises.readdir(objectsDirPath); + for (const objectDir of objectDirs) { + if (typeof objectDir !== 'string') { + utils.never('objectDir should be a string'); + } + if (excludedDirs.includes(objectDir)) continue; + const objectIds = await efs.promises.readdir( + path.join(objectsDirPath, objectDir), + ); + for (const objectId of objectIds) { + objectSet.add(objectDir + objectId); } } - return log; + return [...objectSet]; } -async function getPackID(efs: EncryptedFS): Promise { - const pack = ( - await efs.promises.readdir(path.join('.git', 'objects', 'pack')) - )[0]; - return (pack as string).substring(5, 45); +type NegotiationTestData = + | { + type: 'want'; + objectId: string; + capabilityList: CapabilityList; + } + | { + type: 'have'; + objectId: string; + } + | { + type: 'SEPARATOR' | 'done' | 'none'; + }; + +/** + * This will generate a request line that would be sent by the git client when requesting objects. + * It is explicitly used to generate test data for the `parseRequestLine` code. + * + * @param data - type of line with data to be generated + * @param rest - Random buffer data to be appended to the end to simulate more lines in the stream. + */ +function generateGitNegotiationLine(data: NegotiationTestData, rest: Buffer) { + switch (data.type) { + case 'want': { + // Generate a `want` line that includes `want`, the `objectId` and capabilities + const line = Buffer.concat([ + Buffer.from(data.type), + gitUtils.SPACE_BUFFER, + Buffer.from(data.objectId), + gitUtils.SPACE_BUFFER, + Buffer.from(data.capabilityList.join(gitUtils.SPACE_STRING)), + gitUtils.LINE_FEED_BUFFER, + ]); + return Buffer.concat([gitHttp.packetLineBuffer(line), rest]); + } + case 'have': { + // Generate a `have` line indicating an object that doesn't need to be sent + const line = Buffer.concat([ + Buffer.from(data.type), + gitUtils.SPACE_BUFFER, + Buffer.from(data.objectId), + gitUtils.LINE_FEED_BUFFER, + ]); + return Buffer.concat([gitHttp.packetLineBuffer(line), rest]); + } + case 'SEPARATOR': + // Generate a `0000` flush packet + return Buffer.concat([Buffer.from('0000'), rest]); + case 'done': + // Generate a `done` packet. + return Buffer.concat([Buffer.from('0009done\n'), rest]); + case 'none': + // Generate an empty buffer to simulate the stream running out of data to process + return Buffer.alloc(0); + default: + utils.never(); + } } -export { createGitRepo, getPackID }; +/** + * Create a test request handler for use with `git.clone` and `git.pull` + */ +function request({ + efs, + dir, + gitDir, +}: { + efs: EncryptedFS; + dir: string; + gitDir: string; +}) { + return async ({ + url, + method = 'GET', + headers = {}, + body = [Buffer.from('')], + }: { + url: string; + method: string; + headers: POJO; + body: Array; + }) => { + // Console.log('body', body.map(v => v.toString())) + if (method === 'GET') { + // Send back the GET request info response + const advertiseRefGen = gitHttp.advertiseRefGenerator({ + efs, + dir, + gitDir, + }); + + return { + url: url, + method: method, + body: advertiseRefGen, + headers: headers, + statusCode: 200, + statusMessage: 'OK', + }; + } else if (method === 'POST') { + const packGen = gitHttp.generatePackRequest({ + efs, + dir, + gitDir, + body, + }); + return { + url: url, + method: method, + body: packGen, + headers: headers, + statusCode: 200, + statusMessage: 'OK', + }; + } else { + utils.never(); + } + }; +} + +// Generates a git objectId in the form of a 40-digit hex number +const gitObjectIdArb = fc.hexaString({ + maxLength: 40, + minLength: 40, +}); +// Generates a list of capabilities, theses are just random valid strings +const gitCapabilityListArb = fc.array( + fc.stringOf( + fc.constantFrom(...`abcdefghijklmnopqrstuvwxyz-1234567890`.split('')), + { minLength: 5 }, + ), + { size: 'small' }, +); +// Generates git request data used for testing `parseRequestLine` +const gitRequestDataArb = fc.oneof( + fc.record({ + type: fc.constant('want') as Arbitrary<'want'>, + objectId: gitObjectIdArb, + capabilityList: gitCapabilityListArb, + }), + fc.record({ + type: fc.constant('have') as Arbitrary<'have'>, + objectId: gitObjectIdArb, + }), + fc.record({ + type: fc.constantFrom<'SEPARATOR' | 'done' | 'none'>( + 'SEPARATOR', + 'done', + 'none', + ), + }), +); + +export { + createGitRepo, + listGitObjects, + generateGitNegotiationLine, + request, + gitObjectIdArb, + gitCapabilityListArb, + gitRequestDataArb, +};