Skip to content

Commit 1f4da95

Browse files
rmuirdweiss
andauthored
improve 'javadoc-style-license-header' and 'license-after-package' (#14849)
these two checks are currently implemented with regex rules looking for comments, then calling into rat API to check if it is a license. It means parsing every java file with rat just for this purpose, and the regexes are not perfect. The new rules are very simple but find some inconsistencies that weren't found before: they aren't stricter, the previous code just misses some. We now only call into rat here for xml files, separately we should decide if that is worth it. If not, we can reduce the gradle code further and avoid challenges with Rat API on upgrades. To me, checking this for the 29 XML files is not worth the hassle of installing additional C library / configuring `customLanguages` to check the xml with this tool, I add xml parser in my own personal config but I think its too much of a hassle for anyone else / CI. * strengthen rule with wildcard node This allows stuff in between such as import statements, and finds more inconsistencies: fix them. The TokenManager associated with the StandardSyntaxParser also has issues here, it has a package statement and import statement before the license. * add tests for additional cases now found and fixed * javadoc-license: add autofix * Correct license location in StandardSyntaxParser. * This adds ASL headers to antlr-generated code and removes antlr generated sources from generated family in rat checks. --------- Co-authored-by: Dawid Weiss <[email protected]>
1 parent c075548 commit 1f4da95

38 files changed

+376
-143
lines changed

build-tools/build-infra/src/main/groovy/lucene.regenerate.antlr.gradle

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,27 @@ configure(project(":lucene:expressions")) {
6969
generatedFiles.each { file ->
7070
modifyFile(file, { text ->
7171
text = text.replaceAll("public ((interface|class) Javascript\\w+)", "\$1")
72-
text = text.replaceAll("// Generated from .*", "// ANTLR GENERATED CODE: DO NOT EDIT")
72+
text = text.replaceAll("// Generated from .*",
73+
"""
74+
/*
75+
* Licensed to the Apache Software Foundation (ASF) under one or more
76+
* contributor license agreements. See the NOTICE file distributed with
77+
* this work for additional information regarding copyright ownership.
78+
* The ASF licenses this file to You under the Apache License, Version 2.0
79+
* (the "License"); you may not use this file except in compliance with
80+
* the License. You may obtain a copy of the License at
81+
*
82+
* http://www.apache.org/licenses/LICENSE-2.0
83+
*
84+
* Unless required by applicable law or agreed to in writing, software
85+
* distributed under the License is distributed on an "AS IS" BASIS,
86+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
87+
* See the License for the specific language governing permissions and
88+
* limitations under the License.
89+
*/
90+
91+
// ANTLR GENERATED CODE: DO NOT EDIT.
92+
""")
7393
return text
7494
})
7595
}

build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,6 @@ class RatTask extends DefaultTask {
217217
pattern(substring: "Produced by GNUPLOT")
218218
// snowball stemmers generated by snowball compiler
219219
pattern(substring: "Generated by Snowball")
220-
// parsers generated by antlr
221-
pattern(substring: "ANTLR GENERATED CODE")
222220
}
223221

224222
approvedLicense(familyName: "Apache")

build-tools/build-infra/src/main/groovy/lucene.validation.source-patterns.gradle

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,8 @@ class ValidateSourcePatternsTask extends DefaultTask {
147147
violations.add(msg)
148148
}
149149

150-
def javadocsPattern = ~$/(?sm)^\Q/**\E(.*?)\Q*/\E/$;
151-
def javaCommentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$;
152150
def xmlCommentPattern = ~$/(?sm)\Q<!--\E(.*?)\Q-->\E/$;
153151
def lineSplitter = ~$/[\r\n]+/$;
154-
def packagePattern = ~$/(?m)^\s*package\s+org\.apache.*;/$;
155152
def xmlTagPattern = ~$/(?m)\s*<[a-zA-Z].*/$;
156153
def validSPINameJavadocTag = ~$/(?s)\s*\*\s*@lucene\.spi\s+\{@value #NAME\}/$;
157154

@@ -209,14 +206,6 @@ class ValidateSourcePatternsTask extends DefaultTask {
209206
reportViolation(f, String.format(Locale.ROOT, '%s [start=%d, end=%d]', name, matcher.start(), matcher.end()));
210207
}
211208
}
212-
def javadocsMatcher = javadocsPattern.matcher(text);
213-
def ratDocument = new FileDocument(f);
214-
while (javadocsMatcher.find()) {
215-
if (isLicense(javadocsMatcher, ratDocument)) {
216-
reportViolation(f, String.format(Locale.ENGLISH, 'javadoc-style license header [%s]',
217-
ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME)));
218-
}
219-
}
220209
if (f.name.endsWith('.java')) {
221210
// make sure that SPI names of all tokenizers/charfilters/tokenfilters are documented
222211
if (!f.name.contains("Test") && !f.name.contains("Mock") && !f.name.contains("Fake") && !text.contains("abstract class") &&
@@ -228,9 +217,9 @@ class ValidateSourcePatternsTask extends DefaultTask {
228217
reportViolation(f, 'invalid spi name documentation')
229218
}
230219
}
231-
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
232220
}
233221
if (f.name.endsWith('.xml')) {
222+
def ratDocument = new FileDocument(f);
234223
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
235224
}
236225
} catch (e) {

gradle/validation/ast-grep/rules/java-patterns.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,33 @@ rule:
2121
severity: error
2222
message: illegal use of `var` keyword with generic instance creation
2323
note: add explicit typing on the RHS when using `var`
24+
---
25+
# yaml-language-server: $schema=https://raw.githubusercontent.com/ast-grep/ast-grep/refs/heads/main/schemas/java_rule.json
26+
id: javadoc-style-license-header
27+
language: java
28+
rule:
29+
matches: java-license
30+
regex: "^/[*][*]"
31+
pattern: $TEXT
32+
# remove extraneous stars
33+
transform:
34+
NEWTEXT:
35+
replace:
36+
replace: "^/[*]+"
37+
by: "/*"
38+
source: $TEXT
39+
fix: $NEWTEXT
40+
severity: error
41+
message: license should be a non-javadoc block comment
42+
---
43+
# yaml-language-server: $schema=https://raw.githubusercontent.com/ast-grep/ast-grep/refs/heads/main/schemas/java_rule.json
44+
id: license-after-package
45+
language: java
46+
rule:
47+
kind: package_declaration
48+
precedes:
49+
matches: java-license
50+
# allow anything in between (e.g. import statements)
51+
stopBy: end
52+
severity: error
53+
message: license should be before `package` declaration
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/ast-grep/ast-grep/refs/heads/main/schemas/project.json
13
ruleDirs:
24
- ./rules
35

46
testConfigs:
57
- testDir: ./tests
8+
9+
utilDirs:
10+
- ./utils

gradle/validation/ast-grep/tests/java-patterns.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,31 @@ invalid:
1313
valid:
1414
- HashSet<String> x = new HashSet<>();
1515
- var list = new ArrayList<String>();
16+
---
17+
id: javadoc-style-license-header
18+
invalid:
19+
- |
20+
/** BLAH BLAH Unless required by applicable law BLAH BLAH */
21+
valid:
22+
- |
23+
/* BLAH BLAH Unless required by applicable law BLAH BLAH */
24+
- |
25+
/** something else */
26+
---
27+
id: license-after-package
28+
invalid:
29+
- |
30+
package foo;
31+
/* BLAH BLAH Unless required by applicable law BLAH BLAH */
32+
- |
33+
package foo;
34+
import bar;
35+
/* BLAH BLAH Unless required by applicable law BLAH BLAH */
36+
- |
37+
/* BLAH BLAH Unless required by applicable law BLAH BLAH */
38+
package foo;
39+
/* BLAH BLAH Unless required by applicable law BLAH BLAH */
40+
valid:
41+
- |
42+
/* BLAH BLAH Unless required by applicable law BLAH BLAH */
43+
package foo;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/ast-grep/ast-grep/refs/heads/main/schemas/java_rule.json
3+
id: java-license
4+
language: java
5+
rule:
6+
# detect a license for non-legal purposes (e.g. style)
7+
# make this more sophisticated as needed
8+
kind: block_comment
9+
# must be top-level
10+
inside:
11+
kind: program
12+
any:
13+
- regex: Unless required by applicable law
14+
- regex: FITNESS FOR A PARTICULAR PURPOSE

lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*/
17-
package org.apache.lucene.analysis.de;
1817

1918
/*
2019
* This algorithm is updated based on code located at:
@@ -51,6 +50,8 @@
5150
* POSSIBILITY OF SUCH DAMAGE.
5251
*/
5352

53+
package org.apache.lucene.analysis.de;
54+
5455
/**
5556
* Light Stemmer for German.
5657
*

lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*/
17-
package org.apache.lucene.analysis.de;
1817

1918
/*
2019
* This algorithm is updated based on code located at:
@@ -51,6 +50,8 @@
5150
* POSSIBILITY OF SUCH DAMAGE.
5251
*/
5352

53+
package org.apache.lucene.analysis.de;
54+
5455
/**
5556
* Minimal Stemmer for German.
5657
*

lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*/
17-
package org.apache.lucene.analysis.es;
1817

1918
/*
2019
* This algorithm is updated based on code located at:
@@ -51,6 +50,8 @@
5150
* POSSIBILITY OF SUCH DAMAGE.
5251
*/
5352

53+
package org.apache.lucene.analysis.es;
54+
5455
/**
5556
* Light Stemmer for Spanish
5657
*

0 commit comments

Comments
 (0)