1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.File;
7 import java.io.FileNotFoundException;
8 import java.io.IOException;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Properties;
14 import java.util.Set;
15 import java.util.TreeMap;
16
17 import net.sourceforge.pmd.util.FileFinder;
18
19 import org.apache.commons.io.FilenameUtils;
20
21 public class CPD {
22
23 private static final int MISSING_FILES = 1;
24 private static final int MISSING_ARGS = 2;
25 private static final int DUPLICATE_CODE_FOUND = 4;
26
27 private CPDConfiguration configuration;
28
29 private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
30 private CPDListener listener = new CPDNullListener();
31 private Tokens tokens = new Tokens();
32 private MatchAlgorithm matchAlgorithm;
33
34 public CPD(CPDConfiguration theConfiguration) {
35 configuration = theConfiguration;
36
37 TokenEntry.clearImages();
38 }
39
40 public void setCpdListener(CPDListener cpdListener) {
41 this.listener = cpdListener;
42 }
43
44 public void go() {
45 matchAlgorithm = new MatchAlgorithm(
46 source, tokens,
47 configuration.minimumTileSize(),
48 listener
49 );
50 matchAlgorithm.findMatches();
51 }
52
53 public Iterator<Match> getMatches() {
54 return matchAlgorithm.matches();
55 }
56
57 public void add(File file) throws IOException {
58 add(1, file);
59 }
60
61 public void addAllInDirectory(String dir) throws IOException {
62 addDirectory(dir, false);
63 }
64
65 public void addRecursively(String dir) throws IOException {
66 addDirectory(dir, true);
67 }
68
69 public void add(List<File> files) throws IOException {
70 for (File f: files) {
71 add(files.size(), f);
72 }
73 }
74
75 private void addDirectory(String dir, boolean recurse) throws IOException {
76 if (!(new File(dir)).exists()) {
77 throw new FileNotFoundException("Couldn't find directory " + dir);
78 }
79 FileFinder finder = new FileFinder();
80
81 add(finder.findFilesFrom(dir, configuration.filenameFilter(), recurse));
82 }
83
84 private Set<String> current = new HashSet<String>();
85
86 private void add(int fileCount, File file) throws IOException {
87
88 if (configuration.skipDuplicates()) {
89
90 String signature = file.getName() + '_' + file.length();
91 if (current.contains(signature)) {
92 System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
93 return;
94 }
95 current.add(signature);
96 }
97
98 if (!FilenameUtils.equalsNormalizedOnSystem(file.getAbsoluteFile().getCanonicalPath(), file.getAbsolutePath())) {
99 System.err.println("Skipping " + file + " since it appears to be a symlink");
100 return;
101 }
102
103 if (!file.exists()) {
104 System.err.println("Skipping " + file + " since it doesn't exist (broken symlink?)");
105 return;
106 }
107
108 listener.addedFile(fileCount, file);
109 SourceCode sourceCode = configuration.sourceCodeFor(file);
110 configuration.tokenizer().tokenize(sourceCode, tokens);
111 source.put(sourceCode.getFileName(), sourceCode);
112 }
113
114 private static void setSystemProperties(String[] args) {
115 boolean ignoreLiterals = CPDConfiguration.findBooleanSwitch(args, "--ignore-literals");
116 boolean ignoreIdentifiers = CPDConfiguration.findBooleanSwitch(args, "--ignore-identifiers");
117 boolean ignoreAnnotations = CPDConfiguration.findBooleanSwitch(args, "--ignore-annotations");
118 Properties properties = System.getProperties();
119 if (ignoreLiterals) {
120 properties.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
121 }
122 if (ignoreIdentifiers) {
123 properties.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
124 }
125 if (ignoreAnnotations) {
126 properties.setProperty(JavaTokenizer.IGNORE_ANNOTATIONS, "true");
127 }
128 System.setProperties(properties);
129 }
130
131 public static void main(String[] args) {
132 if (args.length == 0) {
133 showUsage();
134 System.exit(MISSING_ARGS);
135 }
136
137 try {
138 CPDConfiguration config = new CPDConfiguration(args);
139
140
141
142 setSystemProperties(args);
143
144 CPD cpd = new CPD(config);
145
146
147 boolean missingFiles = true;
148 for (int position = 0; position < args.length; position++) {
149 if (args[position].equals("--files")) {
150 cpd.addRecursively(args[position + 1]);
151 if ( missingFiles ) {
152 missingFiles = false;
153 }
154 }
155 }
156
157 if ( missingFiles ) {
158 System.out.println("No " + "--files" + " value passed in");
159 showUsage();
160 System.exit(MISSING_FILES);
161 }
162
163 cpd.go();
164 if (cpd.getMatches().hasNext()) {
165 System.out.println(config.renderer().render(cpd.getMatches()));
166 System.exit(DUPLICATE_CODE_FOUND);
167 }
168 } catch (Exception e) {
169 e.printStackTrace();
170 }
171 }
172
173 public static void showUsage() {
174 System.out.println("Usage:");
175 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
176 System.out.println("i.e: ");
177 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
178 System.out.println("or: ");
179 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
180 System.out.println("or: ");
181 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
182 }
183
184 }