1 /***
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 */
4 package net.sourceforge.pmd.cpd;
5
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Set;
14
15 public class MatchCollector {
16
17 private MatchAlgorithm ma;
18 private Map startMap = new HashMap();
19 private Map fileMap = new HashMap();
20
21 public MatchCollector(MatchAlgorithm ma) {
22 this.ma = ma;
23 }
24
25 public void collect(int minimumLength, List marks) {
26
27 for (int i = 0; i < marks.size() - 1; i++) {
28 TokenEntry mark1 = (TokenEntry) marks.get(i);
29 for (int j = i + 1; j < marks.size(); j++) {
30 TokenEntry mark2 = (TokenEntry) marks.get(j);
31 int diff = mark1.getIndex() - mark2.getIndex();
32 if (-diff < minimumLength) {
33 continue;
34 }
35 if (hasPreviousDupe(mark1, mark2)) {
36 continue;
37 }
38 int dupes = countDuplicateTokens(mark1, mark2);
39
40 if (dupes < minimumLength) {
41 continue;
42 }
43
44 if (diff + dupes >= 1) {
45 continue;
46 }
47 determineMatch(mark1, mark2, dupes);
48 }
49 }
50 }
51
52 public List getMatches() {
53 ArrayList matchList = new ArrayList(startMap.values());
54 groupMatches(matchList);
55 return matchList;
56 }
57
58 /***
59 * A greedy algorithm for determining non-overlapping matches
60 */
61 private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
62 Match match = new Match(dupes, mark1, mark2);
63 String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
64 ArrayList pairMatches = (ArrayList) fileMap.get(fileKey);
65 if (pairMatches == null) {
66 pairMatches = new ArrayList();
67 fileMap.put(fileKey, pairMatches);
68 }
69 boolean add = true;
70 for (int k = 0; k < pairMatches.size(); k++) {
71 Match other = (Match) pairMatches.get(k);
72 if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
73 > 0) {
74 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
75 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
76 || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
77 if (other.getTokenCount() >= match.getTokenCount()) {
78 add = false;
79 break;
80 } else {
81 pairMatches.remove(k);
82 startMap.remove(other.getMatchCode());
83 }
84 }
85 }
86 }
87 if (add) {
88 pairMatches.add(match);
89 startMap.put(match.getMatchCode(), match);
90 }
91 }
92
93 private void groupMatches(ArrayList matchList) {
94 Collections.sort(matchList);
95 HashSet matchSet = new HashSet();
96 Match.MatchCode matchCode = new Match.MatchCode();
97 for (int i = matchList.size(); i > 1; i--) {
98 Match match1 = (Match) matchList.get(i - 1);
99 TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next();
100 matchSet.clear();
101 matchSet.add(match1.getMatchCode());
102 for (int j = i - 1; j > 0; j--) {
103 Match match2 = (Match) matchList.get(j - 1);
104 if (match1.getTokenCount() != match2.getTokenCount()) {
105 break;
106 }
107 TokenEntry mark2 = null;
108 for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) {
109 mark2 = (TokenEntry) iter.next();
110 if (mark2 != mark1) {
111 break;
112 }
113 }
114 int dupes = countDuplicateTokens(mark1, mark2);
115 if (dupes < match1.getTokenCount()) {
116 break;
117 }
118 matchSet.add(match2.getMatchCode());
119 match1.getMarkSet().addAll(match2.getMarkSet());
120 matchList.remove(i - 2);
121 i--;
122 }
123 if (matchSet.size() == 1) {
124 continue;
125 }
126
127 Set pruned = match1.getMarkSet();
128 boolean done = false;
129 ArrayList a1 = new ArrayList(match1.getMarkSet());
130 Collections.sort(a1);
131 for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
132 TokenEntry cmark1 = (TokenEntry) a1.get(outer);
133 for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
134 TokenEntry cmark2 = (TokenEntry) a1.get(inner);
135 matchCode.setFirst(cmark1.getIndex());
136 matchCode.setSecond(cmark2.getIndex());
137 if (!matchSet.contains(matchCode)) {
138 if (pruned.size() > 2) {
139 pruned.remove(cmark2);
140 }
141 if (pruned.size() == 2) {
142 done = true;
143 }
144 }
145 }
146 }
147 }
148 }
149
150 private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
151 if (mark1.getIndex() == 0) {
152 return false;
153 }
154 return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2));
155 }
156
157 private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) {
158 int index = 0;
159 while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) {
160 index++;
161 }
162 return index;
163 }
164
165 private boolean matchEnded(TokenEntry token1, TokenEntry token2) {
166 return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF;
167 }
168 }