1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.StringReader;
7 import java.util.Properties;
8
9 import net.sourceforge.pmd.lang.LanguageVersion;
10 import net.sourceforge.pmd.lang.LanguageVersionHandler;
11 import net.sourceforge.pmd.lang.TokenManager;
12 import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
13 import net.sourceforge.pmd.lang.java.ast.Token;
14
15 public class JavaTokenizer implements Tokenizer {
16
17 public static final String IGNORE_LITERALS = "ignore_literals";
18 public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
19 public static final String IGNORE_ANNOTATIONS = "ignore_annotations";
20 public static final String CPD_START = "\"CPD-START\"";
21 public static final String CPD_END = "\"CPD-END\"";
22
23 private boolean ignoreAnnotations;
24 private boolean ignoreLiterals;
25 private boolean ignoreIdentifiers;
26
27 public void setProperties(Properties properties) {
28 ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
29 ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
30 ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
31 }
32
33 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
34 StringBuilder stringBuilder = sourceCode.getCodeBuffer();
35
36
37 LanguageVersionHandler languageVersionHandler = LanguageVersion.JAVA_14.getLanguageVersionHandler();
38 String fileName = sourceCode.getFileName();
39 TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()).getTokenManager(
40 fileName, new StringReader(stringBuilder.toString()));
41 Token currentToken = (Token) tokenMgr.getNextToken();
42
43 TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
44
45 while (currentToken.image.length() > 0) {
46 discarder.updateState(currentToken);
47
48 if (discarder.isDiscarding()) {
49 currentToken = (Token) tokenMgr.getNextToken();
50 continue;
51 }
52
53 processToken(tokenEntries, fileName, currentToken);
54 currentToken = (Token) tokenMgr.getNextToken();
55 }
56 tokenEntries.add(TokenEntry.getEOF());
57 }
58
59 private void processToken(Tokens tokenEntries, String fileName, Token currentToken) {
60 String image = currentToken.image;
61 if (ignoreLiterals
62 && (currentToken.kind == JavaParserConstants.STRING_LITERAL
63 || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
64 || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
65 || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
66 image = String.valueOf(currentToken.kind);
67 }
68 if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
69 image = String.valueOf(currentToken.kind);
70 }
71 tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
72 }
73
74 public void setIgnoreLiterals(boolean ignore) {
75 this.ignoreLiterals = ignore;
76 }
77
78 public void setIgnoreIdentifiers(boolean ignore) {
79 this.ignoreIdentifiers = ignore;
80 }
81
82 public void setIgnoreAnnotations(boolean ignoreAnnotations) {
83 this.ignoreAnnotations = ignoreAnnotations;
84 }
85
86
87
88
89
90
91
92
93
94
95 private static class TokenDiscarder {
96 private boolean isAnnotation = false;
97 private boolean nextTokenEndsAnnotation = false;
98 private int annotationStack = 0;
99
100 private boolean discardingSemicolon = false;
101 private boolean discardingKeywords = false;
102 private boolean discardingSuppressing = false;
103 private boolean discardingAnnotations = false;
104 private boolean ignoreAnnotations = false;
105
106 public TokenDiscarder(boolean ignoreAnnotations) {
107 this.ignoreAnnotations = ignoreAnnotations;
108 }
109
110 public void updateState(Token currentToken) {
111 detectAnnotations(currentToken);
112
113 skipSemicolon(currentToken);
114 skipPackageAndImport(currentToken);
115 skipCPDSuppression(currentToken);
116 if (ignoreAnnotations) {
117 skipAnnotations();
118 }
119 }
120
121 public void skipPackageAndImport(Token currentToken) {
122 if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
123 discardingKeywords = true;
124 } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
125 discardingKeywords = false;
126 }
127 }
128
129 public void skipSemicolon(Token currentToken) {
130 if (currentToken.kind == JavaParserConstants.SEMICOLON) {
131 discardingSemicolon = true;
132 } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
133 discardingSemicolon = false;
134 }
135 }
136
137 public void skipCPDSuppression(Token currentToken) {
138
139 if (isAnnotation) {
140 if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_START.equals(currentToken.image)) {
141 discardingSuppressing = true;
142 } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_END.equals(currentToken.image)) {
143 discardingSuppressing = false;
144 }
145 }
146 }
147
148 public void skipAnnotations() {
149 if (!discardingAnnotations && isAnnotation) {
150 discardingAnnotations = true;
151 } else if (discardingAnnotations && !isAnnotation) {
152 discardingAnnotations = false;
153 }
154 }
155
156 public boolean isDiscarding() {
157 boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations || discardingSuppressing;
158 return result;
159 }
160
161 public void detectAnnotations(Token currentToken) {
162 if (isAnnotation && nextTokenEndsAnnotation) {
163 isAnnotation = false;
164 nextTokenEndsAnnotation = false;
165 }
166 if (isAnnotation) {
167 if (currentToken.kind == JavaParserConstants.LPAREN) {
168 annotationStack++;
169 } else if (currentToken.kind == JavaParserConstants.RPAREN) {
170 annotationStack--;
171 if (annotationStack == 0) {
172 nextTokenEndsAnnotation = true;
173 }
174 } else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER && currentToken.kind != JavaParserConstants.LPAREN) {
175 isAnnotation = false;
176 }
177 }
178 if (currentToken.kind == JavaParserConstants.AT) {
179 isAnnotation = true;
180 }
181 }
182 }
183 }