From 8e0b1e3d013ee379335bc89801525a861047929d Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 13 Jan 2026 17:08:11 -0800 Subject: [PATCH 1/2] userdiff: tighten word-diff test case of the scheme driver The scheme driver separates identifiers only at parentheses of all sorts and whitespace, except that vertical bars act as brackets that enclose an identifier. The test case attempts to demonstrate the vertical bars with a change from 'some-text' to '|a greeting|'. However, this misses the goal because the same word coloring would be applied if '|a greeting|' were parsed as two words. Have an identifier between vertical bars with a space in both the pre- and the post-image and change only one side of the space to show that the single word exists between the vertical bars. Also add cases that change parentheses of all kinds in a sequence of parentheses to show that they are their own word each. Signed-off-by: Johannes Sixt Signed-off-by: Scott L. Burson --- t/t4034/scheme/expect | 5 +++-- t/t4034/scheme/post | 1 + t/t4034/scheme/pre | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/t/t4034/scheme/expect b/t/t4034/scheme/expect index 496cd5de8c9af3..138abe9f56b38f 100644 --- a/t/t4034/scheme/expect +++ b/t/t4034/scheme/expect @@ -2,10 +2,11 @@ index 74b6605..63b6ac4 100644 --- a/pre +++ b/post -@@ -1,6 +1,6 @@ +@@ -1,7 +1,7 @@ (define (myfunc a bmy-func first second) ; This is a really(moderately) cool function. (this\placethat\place (+ 3 4)) - (define some-text|a greeting| "hello") + (define |the greeting||a greeting| "hello") + ({}(([](func-n)[])){}) (let ((c (+ a badd1 first))) (format "one more than the total is %d" (add1+ c second)))) diff --git a/t/t4034/scheme/post b/t/t4034/scheme/post index 63b6ac4f8754d8..0e3bab101da03e 100644 --- a/t/t4034/scheme/post +++ b/t/t4034/scheme/post @@ -2,5 +2,6 @@ ; This is a (moderately) cool function. (that\place (+ 3 4)) (define |a greeting| "hello") + ({(([(func-n)]))}) (let ((c (add1 first))) (format "one more than the total is %d" (+ c second)))) diff --git a/t/t4034/scheme/pre b/t/t4034/scheme/pre index 74b66053574b67..03d77c7c430e07 100644 --- a/t/t4034/scheme/pre +++ b/t/t4034/scheme/pre @@ -1,6 +1,7 @@ (define (myfunc a b) ; This is a really cool function. (this\place (+ 3 4)) - (define some-text "hello") + (define |the greeting| "hello") + ({}(([](func-n)[])){}) (let ((c (+ a b))) (format "one more than the total is %d" (add1 c)))) From 0bd51e02ba1aec92f2149a3c870af2dd1fc200b4 Mon Sep 17 00:00:00 2001 From: "Scott L. Burson" Date: Tue, 13 Jan 2026 22:13:01 -0800 Subject: [PATCH 2/2] userdiff: extend Scheme support to cover other Lisp dialects Common Lisp has top-level forms, such as 'defun' and 'defmacro', that are not matched by the current Scheme pattern. Also, it is more common in CL, when defining user macros intended as top-level forms, to prefix their names with "def" instead of "define"; such forms are also not matched. And some top-level forms don't even begin with "def". On the other hand, it is an established formatting convention in the Lisp community that only top-level forms start at the left margin. So matching any unindented line starting with an open parenthesis is an acceptable heuristic; false positives will be rare. However, there are also cases where notionally top-level forms are grouped together within some containing form. At least in the Common Lisp community, it is conventional to indent these by two spaces, or sometimes one. But matching just an open parenthesis indented by two spaces would be too broad; so the pattern added by this commit requires an indented form to start with "(def". It is believed that this strikes a good balance between potential false positives and false negatives. Signed-off-by: Scott L. Burson --- Documentation/gitattributes.adoc | 3 ++- t/t4018/scheme-lisp-defun-a | 4 ++++ t/t4018/scheme-lisp-defun-b | 4 ++++ t/t4018/scheme-lisp-eval-when | 4 ++++ t/t4018/{scheme-module => scheme-module-a} | 0 t/t4018/scheme-module-b | 6 ++++++ t/t4034/scheme/expect | 2 +- t/t4034/scheme/post | 2 +- t/t4034/scheme/pre | 2 +- userdiff.c | 22 ++++++++++++++++------ 10 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 t/t4018/scheme-lisp-defun-a create mode 100644 t/t4018/scheme-lisp-defun-b create mode 100644 t/t4018/scheme-lisp-eval-when rename t/t4018/{scheme-module => scheme-module-a} (100%) create mode 100644 t/t4018/scheme-module-b diff --git a/Documentation/gitattributes.adoc b/Documentation/gitattributes.adoc index f20041a323d174..bd76167a45eb71 100644 --- a/Documentation/gitattributes.adoc +++ b/Documentation/gitattributes.adoc @@ -911,7 +911,8 @@ patterns are available: - `rust` suitable for source code in the Rust language. -- `scheme` suitable for source code in the Scheme language. +- `scheme` suitable for source code in most Lisp dialects, + including Scheme, Emacs Lisp, Common Lisp, and Clojure. - `tex` suitable for source code for LaTeX documents. diff --git a/t/t4018/scheme-lisp-defun-a b/t/t4018/scheme-lisp-defun-a new file mode 100644 index 00000000000000..c3c750f76d7b07 --- /dev/null +++ b/t/t4018/scheme-lisp-defun-a @@ -0,0 +1,4 @@ +(defun some-func (x y z) RIGHT + (let ((a x) + (b y)) + (ChangeMe a b))) diff --git a/t/t4018/scheme-lisp-defun-b b/t/t4018/scheme-lisp-defun-b new file mode 100644 index 00000000000000..21be305968bf6b --- /dev/null +++ b/t/t4018/scheme-lisp-defun-b @@ -0,0 +1,4 @@ +(macrolet ((foo (x) `(bar ,x))) + (defun mumble (x) ; RIGHT + (when (> x 0) + (foo x)))) ; ChangeMe diff --git a/t/t4018/scheme-lisp-eval-when b/t/t4018/scheme-lisp-eval-when new file mode 100644 index 00000000000000..5d941d7e0edda2 --- /dev/null +++ b/t/t4018/scheme-lisp-eval-when @@ -0,0 +1,4 @@ +(eval-when (:compile-toplevel :load-toplevel :execute) ; RIGHT + (set-macro-character #\? + (lambda (stream char) + `(make-pattern-variable ,(read stream))))) ; ChangeMe diff --git a/t/t4018/scheme-module b/t/t4018/scheme-module-a similarity index 100% rename from t/t4018/scheme-module rename to t/t4018/scheme-module-a diff --git a/t/t4018/scheme-module-b b/t/t4018/scheme-module-b new file mode 100644 index 00000000000000..77bc0c5eff4775 --- /dev/null +++ b/t/t4018/scheme-module-b @@ -0,0 +1,6 @@ +(module A + (export with-display-exception) + (extern (display-exception display-exception)) + (def (with-display-exception thunk) RIGHT + (with-catch (lambda (e) (display-exception e (current-error-port)) e) + thunk ChangeMe))) diff --git a/t/t4034/scheme/expect b/t/t4034/scheme/expect index 138abe9f56b38f..fb7f2616fea547 100644 --- a/t/t4034/scheme/expect +++ b/t/t4034/scheme/expect @@ -6,7 +6,7 @@ (define (myfunc a bmy-func first second) ; This is a really(moderately) cool function. (this\placethat\place (+ 3 4)) - (define |the greeting||a greeting| "hello") + (define |the \| \greeting||a \greeting| |hello there|) ({}(([](func-n)[])){}) (let ((c (+ a badd1 first))) (format "one more than the total is %d" (add1+ c second)))) diff --git a/t/t4034/scheme/post b/t/t4034/scheme/post index 0e3bab101da03e..450cc234f75aea 100644 --- a/t/t4034/scheme/post +++ b/t/t4034/scheme/post @@ -1,7 +1,7 @@ (define (my-func first second) ; This is a (moderately) cool function. (that\place (+ 3 4)) - (define |a greeting| "hello") + (define |a \greeting| |hello there|) ({(([(func-n)]))}) (let ((c (add1 first))) (format "one more than the total is %d" (+ c second)))) diff --git a/t/t4034/scheme/pre b/t/t4034/scheme/pre index 03d77c7c430e07..e16ee7584946e4 100644 --- a/t/t4034/scheme/pre +++ b/t/t4034/scheme/pre @@ -1,7 +1,7 @@ (define (myfunc a b) ; This is a really cool function. (this\place (+ 3 4)) - (define |the greeting| "hello") + (define |the \| \greeting| |hello there|) ({}(([](func-n)[])){}) (let ((c (+ a b))) (format "one more than the total is %d" (add1 c)))) diff --git a/userdiff.c b/userdiff.c index fe710a68bfdfa6..b5412e6bc3ecd3 100644 --- a/userdiff.c +++ b/userdiff.c @@ -344,14 +344,24 @@ PATTERNS("rust", "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), PATTERNS("scheme", - "^[\t ]*(\\(((define|def(struct|syntax|class|method|rules|record|proto|alias)?)[-*/ \t]|(library|module|struct|class)[*+ \t]).*)$", /* - * R7RS valid identifiers include any sequence enclosed - * within vertical lines having no backslashes + * An unindented opening parenthesis identifies a top-level + * expression in all Lisp dialects. */ - "\\|([^\\\\]*)\\|" - /* All other words should be delimited by spaces or parentheses */ - "|([^][)(}{[ \t])+"), + "^(\\(.*)$\n" + /* For Scheme: a possibly indented left paren followed by a keyword. */ + "^[\t ]*(\\(((define|def(struct|syntax|class|method|rules|record|proto|alias)?)[-*/ \t]|(library|module|struct|class)[*+ \t]).*)$\n" + /* + * For all Lisp dialects: a slightly indented line starting with "(def". + */ + "^ ?(\\([Dd][Ee][Ff].*)$", + /* + * The union of R7RS and Common Lisp symbol syntax: allows arbitrary + * strings between vertical bars, including any escaped characters. + */ + "\\|([^|\\\\]|\\\\.)*\\|" + /* All other words should be delimited by spaces or parentheses. */ + "|([^][)(}{ \t])+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", "\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\x01-\x7f])+"), { .name = "default", .binary = -1 },