From 8a0a02ed19238cb34ed22a3384034c88dd95bd9d Mon Sep 17 00:00:00 2001 From: Eric Amodio Date: Wed, 25 Oct 2023 02:38:41 -0400 Subject: [PATCH] Improves diff parsing accuracy, perf, & memory use - Replaces line array w/ Map for sparse layout & optimized line lookups - Line map is keyed on file line numbers for easy lookup usage --- src/annotations/gutterChangesAnnotationProvider.ts | 62 ++---- src/env/node/git/localGitProvider.ts | 32 ++- src/git/gitProvider.ts | 4 +- src/git/gitProviderService.ts | 4 +- src/git/models/diff.ts | 61 ++---- src/git/parsers/diffParser.ts | 237 ++++++++++----------- src/hovers/hovers.ts | 26 +-- src/plus/github/githubGitProvider.ts | 4 +- 8 files changed, 175 insertions(+), 255 deletions(-) diff --git a/src/annotations/gutterChangesAnnotationProvider.ts b/src/annotations/gutterChangesAnnotationProvider.ts index e92ed53..7423157 100644 --- a/src/annotations/gutterChangesAnnotationProvider.ts +++ b/src/annotations/gutterChangesAnnotationProvider.ts @@ -14,6 +14,7 @@ import { localChangesMessage } from '../hovers/hovers'; import { configuration } from '../system/configuration'; import { log } from '../system/decorators/log'; import { getLogScope } from '../system/logger.scope'; +import { getSettledValue } from '../system/promise'; import { maybeStopWatch } from '../system/stopwatch'; import type { GitDocumentState } from '../trackers/gitDocumentTracker'; import type { TrackedDocument } from '../trackers/trackedDocument'; @@ -141,7 +142,7 @@ export class GutterChangesAnnotationProvider extends AnnotationProviderBase(d?: T): d is T => Boolean(d)); + ) + .map(d => getSettledValue(d)) + .filter((d?: T): d is T => Boolean(d)); if (!diffs?.length) return false; using sw = maybeStopWatch(scope); @@ -187,14 +190,8 @@ export class GutterChangesAnnotationProvider extends AnnotationProviderBase hunk.current.count) { - state = 'removed'; - } else { - count--; - continue; - } - } else { - count--; - continue; - } - } else if (hunkLine.current?.state === 'added') { - if (hunkLine.previous?.state === 'removed') { - state = 'changed'; - } else { - state = 'added'; - } - } else if (hunkLine?.current.state === 'removed') { - // Check if there are more deleted lines than added lines show a deleted indicator - if (hunk.previous.count > hunk.current.count) { - state = 'removed'; - } else { - count--; - continue; - } - } else { - state = 'changed'; - } - - let decoration = decorationsMap.get(state); + let decoration = decorationsMap.get(hunkLine.state); if (decoration == null) { decoration = { - decorationType: (state === 'added' + decorationType: (hunkLine.state === 'added' ? Decorations.changesLineAddedAnnotation - : state === 'removed' + : hunkLine.state === 'removed' ? Decorations.changesLineDeletedAnnotation : Decorations.changesLineChangedAnnotation)!, rangesOrOptions: [{ range: range }], }; - decorationsMap.set(state, decoration); + decorationsMap.set(hunkLine.state, decoration); } else { decoration.rangesOrOptions.push({ range: range }); } @@ -303,7 +267,7 @@ export class GutterChangesAnnotationProvider extends AnnotationProviderBase hunk.current.count; + const hasMoreDeletedLines = /*hunk.state === 'changed' &&*/ hunk.previous.count > hunk.current.count; if ( position.line >= hunk.current.position.start - 1 && position.line <= hunk.current.position.end - (hasMoreDeletedLines ? 0 : 1) diff --git a/src/env/node/git/localGitProvider.ts b/src/env/node/git/localGitProvider.ts index b0bd7a6..9cfc9b2 100644 --- a/src/env/node/git/localGitProvider.ts +++ b/src/env/node/git/localGitProvider.ts @@ -65,7 +65,7 @@ import type { GitStashCommit } from '../../../git/models/commit'; import { GitCommit, GitCommitIdentity } from '../../../git/models/commit'; import { deletedOrMissing, uncommitted, uncommittedStaged } from '../../../git/models/constants'; import { GitContributor } from '../../../git/models/contributor'; -import type { GitDiff, GitDiffFile, GitDiffFilter, GitDiffHunkLine, GitDiffShortStat } from '../../../git/models/diff'; +import type { GitDiff, GitDiffFile, GitDiffFilter, GitDiffLine, GitDiffShortStat } from '../../../git/models/diff'; import type { GitFile, GitFileStatus } from '../../../git/models/file'; import { GitFileChange } from '../../../git/models/file'; import type { @@ -1435,9 +1435,7 @@ export class LocalGitProvider implements GitProvider, Disposable { Logger.debug(scope, `Cache miss: '${key}'`); - if (doc.state == null) { - doc.state = new GitDocumentState(); - } + doc.state ??= new GitDocumentState(); } const promise = this.getBlameCore(uri, doc, key, scope); @@ -1519,9 +1517,7 @@ export class LocalGitProvider implements GitProvider, Disposable { Logger.debug(scope, `Cache miss: ${key}`); - if (doc.state == null) { - doc.state = new GitDocumentState(); - } + doc.state ??= new GitDocumentState(); } const promise = this.getBlameContentsCore(uri, contents, doc, key, scope); @@ -2738,9 +2734,7 @@ export class LocalGitProvider implements GitProvider, Disposable { Logger.debug(scope, `Cache miss: '${key}'`); - if (doc.state == null) { - doc.state = new GitDocumentState(); - } + doc.state ??= new GitDocumentState(); } const encoding = await getEncoding(uri); @@ -2827,9 +2821,7 @@ export class LocalGitProvider implements GitProvider, Disposable { Logger.debug(scope, `Cache miss: ${key}`); - if (doc.state == null) { - doc.state = new GitDocumentState(); - } + doc.state ??= new GitDocumentState(); } const encoding = await getEncoding(uri); @@ -2902,7 +2894,7 @@ export class LocalGitProvider implements GitProvider, Disposable { editorLine: number, // 0-based, Git is 1-based ref1: string | undefined, ref2?: string, - ): Promise { + ): Promise { try { const diff = await this.getDiffForFile(uri, ref1, ref2); if (diff == null) return undefined; @@ -2911,7 +2903,13 @@ export class LocalGitProvider implements GitProvider, Disposable { const hunk = diff.hunks.find(c => c.current.position.start <= line && c.current.position.end >= line); if (hunk == null) return undefined; - return hunk.lines[line - Math.min(hunk.current.position.start, hunk.previous.position.start)]; + const hunkLine = hunk.lines.get(line); + if (hunkLine == null) return undefined; + + return { + hunk: hunk, + line: hunkLine, + }; } catch (ex) { return undefined; } @@ -3437,9 +3435,7 @@ export class LocalGitProvider implements GitProvider, Disposable { Logger.debug(scope, `Cache miss: '${key}'`); - if (doc.state == null) { - doc.state = new GitDocumentState(); - } + doc.state ??= new GitDocumentState(); } const promise = this.getLogForFileCore(repoPath, relativePath, opts, doc, key, scope); diff --git a/src/git/gitProvider.ts b/src/git/gitProvider.ts index abafd19..7b17950 100644 --- a/src/git/gitProvider.ts +++ b/src/git/gitProvider.ts @@ -7,7 +7,7 @@ import type { GitBlame, GitBlameLine, GitBlameLines } from './models/blame'; import type { BranchSortOptions, GitBranch } from './models/branch'; import type { GitCommit } from './models/commit'; import type { GitContributor } from './models/contributor'; -import type { GitDiff, GitDiffFile, GitDiffFilter, GitDiffHunkLine, GitDiffShortStat } from './models/diff'; +import type { GitDiff, GitDiffFile, GitDiffFilter, GitDiffLine, GitDiffShortStat } from './models/diff'; import type { GitFile } from './models/file'; import type { GitGraph } from './models/graph'; import type { GitLog } from './models/log'; @@ -304,7 +304,7 @@ export interface GitProvider extends Disposable { editorLine: number, ref1: string | undefined, ref2?: string, - ): Promise; + ): Promise; getDiffStatus( repoPath: string, ref1?: string, diff --git a/src/git/gitProviderService.ts b/src/git/gitProviderService.ts index bcc0848..703ac38 100644 --- a/src/git/gitProviderService.ts +++ b/src/git/gitProviderService.ts @@ -57,7 +57,7 @@ import type { BranchSortOptions, GitBranch } from './models/branch'; import { GitCommit, GitCommitIdentity } from './models/commit'; import { deletedOrMissing, uncommitted, uncommittedStaged } from './models/constants'; import type { GitContributor } from './models/contributor'; -import type { GitDiff, GitDiffFile, GitDiffFilter, GitDiffHunkLine, GitDiffShortStat } from './models/diff'; +import type { GitDiff, GitDiffFile, GitDiffFilter, GitDiffLine, GitDiffShortStat } from './models/diff'; import type { GitFile } from './models/file'; import type { GitGraph } from './models/graph'; import type { SearchedIssue } from './models/issue'; @@ -1795,7 +1795,7 @@ export class GitProviderService implements Disposable { editorLine: number, ref1: string | undefined, ref2?: string, - ): Promise { + ): Promise { const { provider } = this.getProvider(uri); return provider.getDiffForLine(uri, editorLine, ref1, ref2); } diff --git a/src/git/models/diff.ts b/src/git/models/diff.ts index fa770c8..b05751f 100644 --- a/src/git/models/diff.ts +++ b/src/git/models/diff.ts @@ -1,49 +1,25 @@ -import { parseGitDiffHunk } from '../parsers/diffParser'; - -export interface GitDiffLine { - line: string; - state: 'added' | 'removed' | 'unchanged'; +export interface GitDiff { + readonly baseSha: string; + readonly contents: string; } export interface GitDiffHunkLine { - hunk: GitDiffHunk; - current: GitDiffLine | undefined; - previous: GitDiffLine | undefined; + current: string | undefined; + previous: string | undefined; + state: 'added' | 'changed' | 'removed' | 'unchanged'; } -export class GitDiffHunk { - constructor( - public readonly contents: string, - public current: { - count: number; - position: { start: number; end: number }; - }, - public previous: { - count: number; - position: { start: number; end: number }; - }, - ) {} - - get lines(): GitDiffHunkLine[] { - return this.parseHunk().lines; - } - - get state(): 'added' | 'changed' | 'removed' { - return this.parseHunk().state; - } - - private parsedHunk: { lines: GitDiffHunkLine[]; state: 'added' | 'changed' | 'removed' } | undefined; - private parseHunk() { - if (this.parsedHunk == null) { - this.parsedHunk = parseGitDiffHunk(this); - } - return this.parsedHunk; - } -} - -export interface GitDiff { - readonly baseSha: string; +export interface GitDiffHunk { readonly contents: string; + readonly current: { + readonly count: number; + readonly position: { readonly start: number; readonly end: number }; + }; + readonly previous: { + readonly count: number; + readonly position: { readonly start: number; readonly end: number }; + }; + readonly lines: Map; } export interface GitDiffFile { @@ -51,6 +27,11 @@ export interface GitDiffFile { readonly contents?: string; } +export interface GitDiffLine { + readonly hunk: GitDiffHunk; + readonly line: GitDiffHunkLine; +} + export interface GitDiffShortStat { readonly additions: number; readonly deletions: number; diff --git a/src/git/parsers/diffParser.ts b/src/git/parsers/diffParser.ts index 1238414..b9725ef 100644 --- a/src/git/parsers/diffParser.ts +++ b/src/git/parsers/diffParser.ts @@ -1,152 +1,131 @@ import { maybeStopWatch } from '../../system/stopwatch'; -import { getLines } from '../../system/string'; -import type { GitDiffFile, GitDiffHunkLine, GitDiffLine, GitDiffShortStat } from '../models/diff'; -import { GitDiffHunk } from '../models/diff'; +import type { GitDiffFile, GitDiffHunk, GitDiffHunkLine, GitDiffShortStat } from '../models/diff'; import type { GitFile, GitFileStatus } from '../models/file'; const shortStatDiffRegex = /(\d+)\s+files? changed(?:,\s+(\d+)\s+insertions?\(\+\))?(?:,\s+(\d+)\s+deletions?\(-\))?/; -const unifiedDiffRegex = /^@@ -([\d]+)(?:,([\d]+))? \+([\d]+)(?:,([\d]+))? @@(?:.*?)\n([\s\S]*?)(?=^@@)/gm; -export function parseGitFileDiff(data: string, includeContents: boolean = false): GitDiffFile | undefined { +function parseHunkHeaderPart(headerPart: string) { + const [startS, countS] = headerPart.split(','); + const count = Number(countS) || 1; + const start = Number(startS); + return { count: count, position: { start: start, end: start + count - 1 } }; +} + +export function parseGitFileDiff(data: string, includeContents = false): GitDiffFile | undefined { using sw = maybeStopWatch('Git.parseFileDiff', { log: false, logLevel: 'debug' }); if (!data) return undefined; const hunks: GitDiffHunk[] = []; - let previousStart; - let previousCount; - let currentStart; - let currentCount; - let hunk; - - let match; - do { - match = unifiedDiffRegex.exec(`${data}\n@@`); - if (match == null) break; - - [, previousStart, previousCount, currentStart, currentCount, hunk] = match; - - previousCount = Number(previousCount) || 0; - previousStart = Number(previousStart) || 0; - currentCount = Number(currentCount) || 0; - currentStart = Number(currentStart) || 0; - - hunks.push( - new GitDiffHunk( - // Stops excessive memory usage -- https://bugs.chromium.org/p/v8/issues/detail?id=2869 - ` ${hunk}`.substr(1), - { - count: currentCount === 0 ? 1 : currentCount, - position: { - start: currentStart, - end: currentStart + (currentCount > 0 ? currentCount - 1 : 0), - }, - }, - { - count: previousCount === 0 ? 1 : previousCount, - position: { - start: previousStart, - end: previousStart + (previousCount > 0 ? previousCount - 1 : 0), - }, - }, - ), - ); - } while (true); - - sw?.stop({ suffix: ` parsed ${hunks.length} hunks` }); - - if (!hunks.length) return undefined; - - const diff: GitDiffFile = { - contents: includeContents ? data : undefined, - hunks: hunks, - }; - return diff; -} - -export function parseGitDiffHunk(hunk: GitDiffHunk): { - lines: GitDiffHunkLine[]; - state: 'added' | 'changed' | 'removed'; -} { - using sw = maybeStopWatch('Git.parseDiffHunk', { log: false, logLevel: 'debug' }); - - const currentStart = hunk.current.position.start; - const previousStart = hunk.previous.position.start; - - const currentLines: (GitDiffLine | undefined)[] = - currentStart > previousStart - ? new Array(currentStart - previousStart).fill(undefined, 0, currentStart - previousStart) - : []; - const previousLines: (GitDiffLine | undefined)[] = - previousStart > currentStart - ? new Array(previousStart - currentStart).fill(undefined, 0, previousStart - currentStart) - : []; - - let hasAddedOrChanged; - let hasRemoved; - - let removed = 0; - for (const l of getLines(hunk.contents)) { - switch (l[0]) { - case '+': - hasAddedOrChanged = true; - currentLines.push({ - line: ` ${l.substring(1)}`, - state: 'added', - }); - - if (removed > 0) { - removed--; - } else { - previousLines.push(undefined); - } - - break; + const lines = data.split('\n'); - case '-': - hasRemoved = true; - removed++; - - previousLines.push({ - line: ` ${l.substring(1)}`, - state: 'removed', - }); - - break; - - default: - while (removed > 0) { - removed--; - currentLines.push(undefined); - } - - currentLines.push({ line: l, state: 'unchanged' }); - previousLines.push({ line: l, state: 'unchanged' }); - - break; + // Skip header + let i = -1; + while (i < lines.length) { + if (lines[++i].startsWith('@@')) { + break; } } - while (removed > 0) { - removed--; - currentLines.push(undefined); - } + // Parse hunks + let line; + while (i < lines.length) { + line = lines[i]; + if (!line.startsWith('@@')) continue; + + const content = line.slice(4, -3); + const [previousHeaderPart, currentHeaderPart] = content.split(' +'); + + const current = parseHunkHeaderPart(currentHeaderPart); + const previous = parseHunkHeaderPart(previousHeaderPart); + + const hunkLines = new Map(); + let fileLineNumber = current.position.start; + + line = lines[++i]; + const contentStartLine = i; + + // Parse hunks lines + while (i < lines.length && !line.startsWith('@@')) { + switch (line[0]) { + // deleted + case '-': { + let deletedLineNumber = fileLineNumber; + while (line?.startsWith('-')) { + hunkLines.set(deletedLineNumber++, { + current: undefined, + previous: line.slice(1), + state: 'removed', + }); + line = lines[++i]; + } + + if (line?.startsWith('+')) { + let addedLineNumber = fileLineNumber; + while (line?.startsWith('+')) { + const hunkLine = hunkLines.get(addedLineNumber); + if (hunkLine != null) { + hunkLine.current = line.slice(1); + hunkLine.state = 'changed'; + } else { + hunkLines.set(addedLineNumber, { + current: line.slice(1), + previous: undefined, + state: 'added', + }); + } + addedLineNumber++; + line = lines[++i]; + } + fileLineNumber = addedLineNumber; + } else { + fileLineNumber = deletedLineNumber; + } + break; + } + // added + case '+': + hunkLines.set(fileLineNumber++, { + current: line.slice(1), + previous: undefined, + state: 'added', + }); + + line = lines[++i]; + break; + + // unchanged (context) + case ' ': + hunkLines.set(fileLineNumber++, { + current: line.slice(1), + previous: line.slice(1), + state: 'unchanged', + }); + + line = lines[++i]; + break; + + default: + line = lines[++i]; + break; + } + } - const hunkLines: GitDiffHunkLine[] = []; + const hunk: GitDiffHunk = { + contents: `${lines.slice(contentStartLine, i).join('\n')}\n`, + current: current, + previous: previous, + lines: hunkLines, + }; - for (let i = 0; i < Math.max(currentLines.length, previousLines.length); i++) { - hunkLines.push({ - hunk: hunk, - current: currentLines[i], - previous: previousLines[i], - }); + hunks.push(hunk); } - sw?.stop({ suffix: ` parsed ${hunkLines.length} hunk lines` }); + sw?.stop({ suffix: ` parsed ${hunks.length} hunks` }); return { - lines: hunkLines, - state: hasAddedOrChanged && hasRemoved ? 'changed' : hasAddedOrChanged ? 'added' : 'removed', + contents: includeContents ? data : undefined, + hunks: hunks, }; } diff --git a/src/hovers/hovers.ts b/src/hovers/hovers.ts index 625d0b1..a05f47a 100644 --- a/src/hovers/hovers.ts +++ b/src/hovers/hovers.ts @@ -9,7 +9,7 @@ import { CommitFormatter } from '../git/formatters/commitFormatter'; import { GitUri } from '../git/gitUri'; import type { GitCommit } from '../git/models/commit'; import { uncommittedStaged } from '../git/models/constants'; -import type { GitDiffHunk, GitDiffHunkLine } from '../git/models/diff'; +import type { GitDiffHunk, GitDiffLine } from '../git/models/diff'; import type { PullRequest } from '../git/models/pullRequest'; import { isUncommittedStaged, shortenRevision } from '../git/models/reference'; import type { GitRemote } from '../git/models/remote'; @@ -32,6 +32,9 @@ export async function changesMessage( async function getDiff() { if (commit.file == null) return undefined; + const line = editorLine + 1; + const commitLine = commit.lines.find(l => l.line === line) ?? commit.lines[0]; + // TODO: Figure out how to optimize this let ref; if (commit.isUncommitted) { @@ -39,16 +42,13 @@ export async function changesMessage( ref = documentRef; } } else { - previousSha = await commit.getPreviousSha(); + previousSha = commitLine.previousSha; ref = previousSha; if (ref == null) { return `\`\`\`diff\n+ ${document.lineAt(editorLine).text}\n\`\`\``; } } - const line = editorLine + 1; - const commitLine = commit.lines.find(l => l.line === line) ?? commit.lines[0]; - let originalPath = commit.file.originalPath; if (originalPath == null) { if (uri.fsPath !== commit.file.uri.fsPath) { @@ -58,14 +58,14 @@ export async function changesMessage( editorLine = commitLine.line - 1; // TODO: Doesn't work with dirty files -- pass in editor? or contents? - let hunkLine = await container.git.getDiffForLine(uri, editorLine, ref, documentRef); + let lineDiff = await container.git.getDiffForLine(uri, editorLine, ref, documentRef); // If we didn't find a diff & ref is undefined (meaning uncommitted), check for a staged diff - if (hunkLine == null && ref == null && documentRef !== uncommittedStaged) { - hunkLine = await container.git.getDiffForLine(uri, editorLine, undefined, uncommittedStaged); + if (lineDiff == null && ref == null && documentRef !== uncommittedStaged) { + lineDiff = await container.git.getDiffForLine(uri, editorLine, undefined, uncommittedStaged); } - return hunkLine != null ? getDiffFromHunkLine(hunkLine) : undefined; + return lineDiff != null ? getDiffFromLine(lineDiff) : undefined; } const diff = await getDiff(); @@ -297,12 +297,12 @@ function getDiffFromHunk(hunk: GitDiffHunk): string { return `\`\`\`diff\n${hunk.contents.trim()}\n\`\`\``; } -function getDiffFromHunkLine(hunkLine: GitDiffHunkLine, diffStyle?: 'line' | 'hunk'): string { +function getDiffFromLine(lineDiff: GitDiffLine, diffStyle?: 'line' | 'hunk'): string { if (diffStyle === 'hunk' || (diffStyle == null && configuration.get('hovers.changesDiff') === 'hunk')) { - return getDiffFromHunk(hunkLine.hunk); + return getDiffFromHunk(lineDiff.hunk); } - return `\`\`\`diff${hunkLine.previous == null ? '' : `\n- ${hunkLine.previous.line.trim()}`}${ - hunkLine.current == null ? '' : `\n+ ${hunkLine.current.line.trim()}` + return `\`\`\`diff${lineDiff.line.previous == null ? '' : `\n- ${lineDiff.line.previous.trim()}`}${ + lineDiff.line.current == null ? '' : `\n+ ${lineDiff.line.current.trim()}` }\n\`\`\``; } diff --git a/src/plus/github/githubGitProvider.ts b/src/plus/github/githubGitProvider.ts index a4d19c6..cc12344 100644 --- a/src/plus/github/githubGitProvider.ts +++ b/src/plus/github/githubGitProvider.ts @@ -43,7 +43,7 @@ import type { GitCommitLine } from '../../git/models/commit'; import { getChangedFilesCount, GitCommit, GitCommitIdentity } from '../../git/models/commit'; import { deletedOrMissing, uncommitted } from '../../git/models/constants'; import { GitContributor } from '../../git/models/contributor'; -import type { GitDiffFile, GitDiffFilter, GitDiffHunkLine, GitDiffShortStat } from '../../git/models/diff'; +import type { GitDiffFile, GitDiffFilter, GitDiffLine, GitDiffShortStat } from '../../git/models/diff'; import type { GitFile } from '../../git/models/file'; import { GitFileChange, GitFileIndexStatus } from '../../git/models/file'; import type { @@ -1736,7 +1736,7 @@ export class GitHubGitProvider implements GitProvider, Disposable { _editorLine: number, // 0-based, Git is 1-based _ref1: string | undefined, _ref2?: string, - ): Promise { + ): Promise { return undefined; }