From 3f94dffca1f5d7d82324dca2a055f5416f065fe9 Mon Sep 17 00:00:00 2001 From: zeripath Date: Sat, 31 Oct 2020 19:30:23 +0000 Subject: [PATCH] When creating line diffs do not split within an html entity (#13357) (#13375) Backport #13357 * When creating line diffs do not split within an html entity Fix #13342 Signed-off-by: Andrew Thornton * Add test case Signed-off-by: Andrew Thornton * improve test Signed-off-by: Andrew Thornton Co-authored-by: techknowlogick Co-authored-by: techknowlogick --- services/gitdiff/gitdiff.go | 84 ++++++++++++++++++++++++++++++++ services/gitdiff/gitdiff_test.go | 30 ++++++++++++ 2 files changed, 114 insertions(+) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 7acf8324c3..50e2f8cf1e 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -290,6 +290,9 @@ func init() { diffMatchPatch.DiffEditCost = 100 } +var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`) +var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`) + // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML { if setting.Git.DisableDiffHighlight { @@ -329,9 +332,90 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) + + // Now we need to clean up the split entities + diffRecord = unsplitEntities(diffRecord) + diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) + return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) } +// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html +func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff { + // Unsplitting entities is simple... + // + // Iterate through all be the last records because if we're the last record then there's nothing we can do + for i := 0; i+1 < len(records); i++ { + record := &records[i] + + // Look for an unterminated entity at the end of the line + unterminated := unterminatedEntityRE.FindString(record.Text) + if len(unterminated) == 0 { + continue + } + + switch record.Type { + case diffmatchpatch.DiffEqual: + // If we're an diff equal we want to give this unterminated entity to our next delete and insert + record.Text = record.Text[0 : len(record.Text)-len(unterminated)] + records[i+1].Text = unterminated + records[i+1].Text + + nextType := records[i+1].Type + + if nextType == diffmatchpatch.DiffEqual { + continue + } + + // if the next in line is a delete then we will want the thing after that to be an insert and so on. + oneAfterType := diffmatchpatch.DiffInsert + if nextType == diffmatchpatch.DiffInsert { + oneAfterType = diffmatchpatch.DiffDelete + } + + if i+2 < len(records) && records[i+2].Type == oneAfterType { + records[i+2].Text = unterminated + records[i+2].Text + } else { + records = append(records[:i+2], append([]diffmatchpatch.Diff{ + { + Type: oneAfterType, + Text: unterminated, + }}, records[i+2:]...)...) + } + case diffmatchpatch.DiffDelete: + fallthrough + case diffmatchpatch.DiffInsert: + // if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line + targetType := diffmatchpatch.DiffInsert + if record.Type == diffmatchpatch.DiffInsert { + targetType = diffmatchpatch.DiffDelete + } + next := &records[i+1] + if next.Type == diffmatchpatch.DiffEqual { + // if the next is an equal we need to snaffle the entity end off the start and add an delete/insert + if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 { + record.Text += terminal + next.Text = next.Text[len(terminal):] + records = append(records[:i+2], append([]diffmatchpatch.Diff{ + { + Type: targetType, + Text: unterminated, + }}, records[i+2:]...)...) + } + } else if next.Type == targetType { + // if the next is an insert we need to snaffle the entity end off the one after that and add it to both. + if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual { + if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 { + record.Text += terminal + next.Text += terminal + records[i+2].Text = records[i+2].Text[len(terminal):] + } + } + } + } + } + return records +} + // DiffFile represents a file diff. type DiffFile struct { Name string diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index e7eeca7004..0683997dab 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -15,6 +15,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" + "github.com/sergi/go-diff/diffmatchpatch" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" "gopkg.in/ini.v1" @@ -26,6 +27,35 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) { } } +func TestUnsplitEntities(t *testing.T) { + left := "sh "useradd -u 111 jenkins"" + right := "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'" + diffRecord := diffMatchPatch.DiffMain(left, right, true) + diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) + + // Now we need to clean up the split entities + diffRecord = unsplitEntities(diffRecord) + diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) + + leftRecombined := "" + rightRecombined := "" + for _, record := range diffRecord { + assert.False(t, unterminatedEntityRE.MatchString(record.Text), "") + switch record.Type { + case diffmatchpatch.DiffDelete: + leftRecombined += record.Text + case diffmatchpatch.DiffInsert: + rightRecombined += record.Text + default: + leftRecombined += record.Text + rightRecombined += record.Text + } + } + + assert.EqualValues(t, left, leftRecombined) + assert.EqualValues(t, right, rightRecombined) +} + func TestDiffToHTML(t *testing.T) { setting.Cfg = ini.Empty() assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{