View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.File;
7   import java.io.FileNotFoundException;
8   import java.io.IOException;
9   import java.util.HashSet;
10  import java.util.Iterator;
11  import java.util.List;
12  import java.util.Map;
13  import java.util.Properties;
14  import java.util.Set;
15  import java.util.TreeMap;
16  
17  import net.sourceforge.pmd.util.FileFinder;
18  
19  import org.apache.commons.io.FilenameUtils;
20  
21  public class CPD {
22  
23      private static final int MISSING_FILES = 1;
24  	private static final int MISSING_ARGS = 2;
25  	private static final int DUPLICATE_CODE_FOUND = 4;
26  
27  	private CPDConfiguration configuration;
28  	
29  	private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
30      private CPDListener listener = new CPDNullListener();
31      private Tokens tokens = new Tokens();
32      private MatchAlgorithm matchAlgorithm;
33  
34      public CPD(CPDConfiguration theConfiguration) {
35      	configuration = theConfiguration;
36          // before we start any tokenizing (add(File...)), we need to reset the static TokenEntry status
37          TokenEntry.clearImages();
38      }
39  
40      public void setCpdListener(CPDListener cpdListener) {
41          this.listener = cpdListener;
42      }
43  
44      public void go() {
45          matchAlgorithm = new MatchAlgorithm(
46          		source, tokens, 
47          		configuration.minimumTileSize(), 
48          		listener
49          		);
50          matchAlgorithm.findMatches();
51      }
52  
53      public Iterator<Match> getMatches() {
54          return matchAlgorithm.matches();
55      }
56  
57      public void add(File file) throws IOException {
58          add(1, file);
59      }
60  
61      public void addAllInDirectory(String dir) throws IOException {
62          addDirectory(dir, false);
63      }
64  
65      public void addRecursively(String dir) throws IOException {
66          addDirectory(dir, true);
67      }
68  
69      public void add(List<File> files) throws IOException {
70          for (File f: files) {
71              add(files.size(), f);
72          }
73      }
74  
75      private void addDirectory(String dir, boolean recurse) throws IOException {
76          if (!(new File(dir)).exists()) {
77              throw new FileNotFoundException("Couldn't find directory " + dir);
78          }
79          FileFinder finder = new FileFinder();
80          // TODO - could use SourceFileSelector here
81          add(finder.findFilesFrom(dir, configuration.filenameFilter(), recurse));
82      }
83  
84      private Set<String> current = new HashSet<String>();
85  
86      private void add(int fileCount, File file) throws IOException {
87  
88          if (configuration.skipDuplicates()) {
89              // TODO refactor this thing into a separate class
90              String signature = file.getName() + '_' + file.length();
91              if (current.contains(signature)) {
92                  System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
93                  return;
94              }
95              current.add(signature);
96          }
97  
98          if (!FilenameUtils.equalsNormalizedOnSystem(file.getAbsoluteFile().getCanonicalPath(), file.getAbsolutePath())) {
99              System.err.println("Skipping " + file + " since it appears to be a symlink");
100             return;
101         }
102 
103         if (!file.exists()) {
104             System.err.println("Skipping " + file + " since it doesn't exist (broken symlink?)");
105             return;
106         }
107 
108         listener.addedFile(fileCount, file);
109         SourceCode sourceCode = configuration.sourceCodeFor(file);
110         configuration.tokenizer().tokenize(sourceCode, tokens);
111         source.put(sourceCode.getFileName(), sourceCode);
112     }
113 
114     private static void setSystemProperties(String[] args) {
115         boolean ignoreLiterals = CPDConfiguration.findBooleanSwitch(args, "--ignore-literals");
116         boolean ignoreIdentifiers = CPDConfiguration.findBooleanSwitch(args, "--ignore-identifiers");
117         boolean ignoreAnnotations = CPDConfiguration.findBooleanSwitch(args, "--ignore-annotations");
118         Properties properties = System.getProperties();
119         if (ignoreLiterals) {
120             properties.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
121         }
122         if (ignoreIdentifiers) {
123             properties.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
124         }
125         if (ignoreAnnotations) {
126             properties.setProperty(JavaTokenizer.IGNORE_ANNOTATIONS, "true");
127         }
128         System.setProperties(properties);
129     }
130 
131     public static void main(String[] args) {
132         if (args.length == 0) {
133         	showUsage();
134             System.exit(MISSING_ARGS);
135         }
136 
137         try {
138         	CPDConfiguration config = new CPDConfiguration(args);
139 
140             // Pass extra parameters as System properties to allow language
141             // implementation to retrieve their associate values...
142             setSystemProperties(args);
143            
144             CPD cpd = new CPD(config);
145             
146             /* FIXME: Improve this !!!	*/
147             boolean missingFiles = true;
148             for (int position = 0; position < args.length; position++) {
149                 if (args[position].equals("--files")) {
150                 	cpd.addRecursively(args[position + 1]);
151                 	if ( missingFiles ) {
152                         missingFiles = false;
153                     }
154                 }
155             }
156 
157             if ( missingFiles ) {
158 	            System.out.println("No " + "--files" + " value passed in");
159 	            showUsage();
160 	            System.exit(MISSING_FILES);
161             }
162 
163             cpd.go();
164             if (cpd.getMatches().hasNext()) {
165                 System.out.println(config.renderer().render(cpd.getMatches()));
166                 System.exit(DUPLICATE_CODE_FOUND);
167             }
168         } catch (Exception e) {
169             e.printStackTrace();
170         }
171     }
172 
173     public static void showUsage() {
174         System.out.println("Usage:");
175         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
176         System.out.println("i.e: ");
177         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
178         System.out.println("or: ");
179         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
180         System.out.println("or: ");
181         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
182     }
183 
184 }