Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 55 additions & 8 deletions keepsorted/line_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
// Returns another boolean indicating whether the group should be ending
// after that line if so.
shouldAddToRegexDelimitedGroup := func(l string) (addToGroup bool, finishGroupAfter bool) {
if metadata.opts.GroupStartRegex != nil {
if metadata.opts.GroupStartRegex != nil {
// For GroupStartRegex, all non-regex-matching lines should be
// part of the group including prior lines.
return !matchesAnyRegex(l, metadata.opts.GroupStartRegex), false
Expand Down Expand Up @@ -349,7 +349,7 @@ func (cb *codeBlock) append(s string, opts blockOptions) {
cb.braceCounts = make(map[string]int)
}

// TODO(jfalgout): Does this need to handle runes more correctly?
// TODO: jfaer - Does this need to handle runes more correctly?
for i := 0; i < len(s); {
if cb.expectedQuote == "" {
// We do not appear to be inside a string literal.
Expand Down Expand Up @@ -419,8 +419,25 @@ func (lg *lineGroup) commentOnly() bool {
}

func (lg *lineGroup) regexTokens() []regexToken {
// TODO: jfaer - Should we match regexes on the original content?
regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines())
var regexMatches []regexMatch
if len(lg.opts.ByRegex) == 0 {
// We apply other options on top of what the regex extracts, but if the user
// didn't set by_regex, we should fall back to the behavior they would've
// expected before we started supporting by_regex.
// Namely: We would apply the other options on top of the
// internalJoinedLines() instead of the raw content.
regexMatches = []regexMatch{{lg.internalJoinedLines()}}
} else {
regexMatches = lg.opts.matchRegexes(lg.regexJoinedLines())
// We still want to apply the joinLines transform so that we get
// "reasonable human" comparisons if the regex matches more than one line.
for _, match := range regexMatches {
for i, s := range match {
match[i] = joinLines(strings.Split(s, "\n"))
}
}
}

ret := make([]regexToken, len(regexMatches))
if lg.access.regexTokens == nil {
lg.access.regexTokens = make([]regexTokenAccessRecorder, len(regexMatches))
Expand Down Expand Up @@ -453,18 +470,29 @@ func (lg *lineGroup) regexTokens() []regexToken {
return ret
}

// internalJoinedLines calculates the same thing as joinedLines, except it
// doesn't record that it was used in the accessRecorder.
// internalJoinedLines attempts to concatenate all of this lineGroup's content
// the same way a reasonable human might.
//
// If the previous line ends with a "word" character and the current line starts
// with a "word" character, the two lines will be separated by a space.
// Otherwise, the lines are concatenated without any separation.
//
// Unlike joinedLines, this method does not record that it was used in the
// accessRecorder.
func (lg *lineGroup) internalJoinedLines() string {
if len(lg.lines) == 0 {
return joinLines(lg.lines)
}

func joinLines(lines []string) string {
if len(lines) == 0 {
return ""
}

endsWithWordChar := regexp.MustCompile(`\w$`)
startsWithWordChar := regexp.MustCompile(`^\w`)
var s strings.Builder
var last string
for _, l := range lg.lines {
for _, l := range lines {
l := strings.TrimLeftFunc(l, unicode.IsSpace)
if len(last) > 0 && len(l) > 0 && endsWithWordChar.MatchString(last) && startsWithWordChar.MatchString(l) {
s.WriteString(" ")
Expand All @@ -475,6 +503,25 @@ func (lg *lineGroup) internalJoinedLines() string {
return s.String()
}

// regexJoinedLines concatenates all of this lineGroup's content in a way that's
// friendlier to regexes than internalJoinedLines.
//
// Primarily, this method still strips leading whitespace but it uses a real
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this behavior might still be somewhat controversial. I can see one primary alternative: Only strip leading whitespace from the first line. Possibly use the first line's indent level to dedent any remaining line by that much instead of stripping all whitespace from following lines

// newline character to separate lines instead of the "word" character logic of
// internalJoinedLines.
func (lg *lineGroup) regexJoinedLines() string {
if len(lg.lines) == 0 {
return ""
}
lines := make([]string, len(lg.lines))
for i, l := range lg.lines {
lines[i] = strings.TrimLeftFunc(l, unicode.IsSpace)
}
return strings.Join(lines, "\n")
}

// joinedLines returns internalJoinedLines and records that it was called in the
// accessRecorder.
func (lg *lineGroup) joinedLines() string {
lg.access.joinedLines = true
return lg.internalJoinedLines()
Expand Down
17 changes: 14 additions & 3 deletions keepsorted/options_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (p *parser) popIntOrBool() (IntOrBool, error) {
func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error {
switch node.Tag {
case "!!str":
pat, err := regexp.Compile(node.Value)
pat, err := compileByRegex(node.Value)
if err != nil {
return err
}
Expand All @@ -144,7 +144,7 @@ func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error {
return fmt.Errorf("by_regex map item must have exactly one key-value pair, but got %d", len(m))
}
for pattern, template := range m {
pat, err := regexp.Compile(pattern)
pat, err := compileByRegex(pattern)
if err != nil {
return fmt.Errorf("invalid regex pattern %q: %w", pattern, err)
}
Expand Down Expand Up @@ -198,7 +198,7 @@ func (p *parser) popIntList() ([]int, error) {

func (p *parser) popByRegexOption() ([]ByRegexOption, error) {
return popListValue(p, func(s string) (ByRegexOption, error) {
pat, err := regexp.Compile(s)
pat, err := compileByRegex(s)
return ByRegexOption{Pattern: pat}, err
})
}
Expand Down Expand Up @@ -328,3 +328,14 @@ func (iter *runeIter) pop() (rune, bool) {
}
return ch, ok
}

func compileByRegex(re string) (*regexp.Regexp, error) {
if !strings.HasPrefix(re, "(?s)") && !strings.HasPrefix(re, "(?-s)") {
// The initial version of by_regex ran on top of lineGroup.joinedLines. This
// meant that users wrote regexes that didn't need to handle newlines.
// To minimize disruption, we automatically set dotall flag so that their
// regexes might still work after we start using real newlines.
re = "(?s)" + re
}
return regexp.Compile(re)
}
16 changes: 8 additions & 8 deletions keepsorted/options_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,17 +236,17 @@ func TestPopValue(t *testing.T) {
name: "Regex",

input: ".*",
want: []ByRegexOption{{regexp.MustCompile(".*"), nil}},
want: []ByRegexOption{{regexp.MustCompile("(?s).*"), nil}},
},
{
name: "MultipleRegex",

input: `[.*, abcd, '(?:efgh)ijkl']`,
allowYAMLList: true,
want: []ByRegexOption{
{regexp.MustCompile(".*"), nil},
{regexp.MustCompile("abcd"), nil},
{regexp.MustCompile("(?:efgh)ijkl"), nil},
{regexp.MustCompile("(?s).*"), nil},
{regexp.MustCompile("(?s)abcd"), nil},
{regexp.MustCompile("(?s)(?:efgh)ijkl"), nil},
},
},
{
Expand All @@ -255,10 +255,10 @@ func TestPopValue(t *testing.T) {
input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`,
allowYAMLList: true,
want: []ByRegexOption{
{regexp.MustCompile(".*"), nil},
{regexp.MustCompile("Mon"), &([]string{"0"})[0]},
{regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]},
{regexp.MustCompile(`0: 1`), &([]string{"2"})[0]},
{regexp.MustCompile("(?s).*"), nil},
{regexp.MustCompile("(?s)Mon"), &([]string{"0"})[0]},
{regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]},
{regexp.MustCompile(`(?s)0: 1`), &([]string{"2"})[0]},
},
},
{
Expand Down
11 changes: 6 additions & 5 deletions keepsorted/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{
AllowYAMLLists: true,
ByRegex: []ByRegexOption{
{regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil},
{regexp.MustCompile("(?s)(?:abcd)"), nil},
{regexp.MustCompile("(?s)efg.*"), nil},
},
},
},
Expand All @@ -230,9 +231,9 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{
AllowYAMLLists: true,
ByRegex: []ByRegexOption{
{Pattern: regexp.MustCompile(`.*`)},
{Pattern: regexp.MustCompile(`(?s).*`)},
{
Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`),
Pattern: regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`),
Template: &[]string{"${3}-${1}-${2}"}[0],
},
},
Expand All @@ -246,9 +247,9 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{
AllowYAMLLists: true,
ByRegex: []ByRegexOption{
{Pattern: regexp.MustCompile(`foo, bar`)},
{Pattern: regexp.MustCompile(`(?s)foo, bar`)},
{
Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`),
Pattern: regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`),
Template: &[]string{"${3}-${1}-${2}"}[0],
},
},
Expand Down
Loading