1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package net.sf.statsvn.input;
24
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.Date;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.ListIterator;
31 import java.util.Map;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
34
35 import net.sf.statcvs.input.NoLineCountException;
36 import net.sf.statcvs.model.Revision;
37 import net.sf.statcvs.model.SymbolicName;
38 import net.sf.statcvs.model.VersionedFile;
39 import net.sf.statsvn.output.SvnConfigurationOptions;
40
41 /**
42 * <p>
43 * Builds a {@link VersionedFile} with {@link Revision}s from logging data.
44 * This class is responsible for deciding if a file or revisions will be
45 * included in the report, for translating from CVS logfile data structures to
46 * the data structures in the <tt>net.sf.statsvn.model</tt> package, and for
47 * calculating the LOC history for the file.
48 * </p>
49 *
50 * <p>
51 * A main goal of this class is to delay the creation of the
52 * <tt>VersionedFile</tt> object until all revisions of the file have been
53 * collected from the log. We could simply create <tt>VersionedFile</tt> and
54 * <tt>Revision</tt>s on the fly as we parse through the log, but this
55 * creates a problem if we decide not to include the file after reading several
56 * revisions. The creation of a <tt>VersionedFile</tt> or <tt>Revision</tt>
57 * can cause many more objects to be created (<tt>Author</tt>,
58 * <tt>Directory</tt>, <tt>Commit</tt>), and it would be very hard to get
59 * rid of them if we don't want the file. This problem is solved by first
60 * collecting all information about one file in this class, and then, with all
61 * information present, deciding if we want to create the model instances or
62 * not.
63 * </p>
64 *
65 * @author Richard Cyganiak <richard@cyganiak.de>
66 * @author Tammo van Lessen
67 * @author Jason Kealey <jkealey@shade.ca>
68 * @author Gunter Mussbacher <gunterm@site.uottawa.ca>
69 * @version $Id: FileBuilder.java 351 2008-03-28 18:46:26Z benoitx $
70 */
71 public class FileBuilder {
72 private static final int ONE_SECOND = 1000;
73
74 private static final int ONE_MIN_IN_MS = 60000;
75
76 private final Builder builder;
77
78 private final String name;
79
80 private boolean binary;
81
82 private final List revisions = new ArrayList();
83
84 private final Map revBySymnames;
85
86 private final Map dateBySymnames;
87
88 private int locDelta;
89
90 /**
91 * Creates a new <tt>FileBuilder</tt>.
92 *
93 * @param builder
94 * a <tt>Builder</tt> that provides factory services for author
95 * and directory instances and line counts.
96 * @param name
97 * the filename
98 * @param binary
99 * Is this a binary file or not?
100 */
101 public FileBuilder(final Builder builder, final String name, final boolean isBinary, final Map revBySymnames, final Map dateBySymnames) {
102 this.builder = builder;
103 this.name = name;
104 this.binary = isBinary;
105 this.revBySymnames = revBySymnames;
106 this.dateBySymnames = dateBySymnames;
107
108 SvnConfigurationOptions.getTaskLogger().log("logging " + name);
109 }
110
111 /**
112 * Adds a revision to the file. The revisions must be added in the same
113 * order as they appear in the CVS logfile, that is, most recent first.
114 *
115 * @param data
116 * the revision
117 */
118 public void addRevisionData(final RevisionData data) {
119 if (binary && !data.isCreationOrRestore()) {
120 data.setLines(0, 0);
121 }
122 this.revisions.add(data);
123
124 locDelta += getLOCChange(data);
125 }
126
127 /**
128 * Creates and returns a {@link VersionedFile} representation of the file.
129 * <tt>null</tt> is returned if the file does not meet certain criteria,
130 * for example if its filename meets an exclude filter or if it was dead
131 * during the entire logging timespan.
132 *
133 * @param beginOfLogDate
134 * the date of the begin of the log
135 * @return a <tt>VersionedFile</tt> representation of the file.
136 */
137 public VersionedFile createFile(final Date beginOfLogDate) {
138 if (isFilteredFile() || !fileExistsInLogPeriod()) {
139 return null;
140 }
141
142 final VersionedFile file = new VersionedFile(name, builder.getDirectory(name));
143
144 if (revisions.isEmpty()) {
145 buildBeginOfLogRevision(file, beginOfLogDate, getFinalLOC(), null);
146 return file;
147 }
148
149 final Iterator it = revisions.iterator();
150 RevisionData currentData = (RevisionData) it.next();
151 int currentLOC = getFinalLOC();
152 RevisionData previousData;
153 int previousLOC;
154 SortedSet symbolicNames;
155
156 while (it.hasNext()) {
157 previousData = currentData;
158 previousLOC = currentLOC;
159 currentData = (RevisionData) it.next();
160 currentLOC = previousLOC - getLOCChange(previousData);
161
162
163 symbolicNames = createSymbolicNamesCollection(previousData);
164
165 if (previousData.isCreationOrRestore() || previousData.isChange() || isBinary()) {
166 if (currentData.isDeletion()) {
167 buildCreationRevision(file, previousData, previousLOC, symbolicNames);
168 } else {
169 buildChangeRevision(file, previousData, previousLOC, symbolicNames);
170 }
171 } else if (previousData.isDeletion()) {
172 buildDeletionRevision(file, previousData, previousLOC, symbolicNames);
173 } else {
174 SvnConfigurationOptions.getTaskLogger().info("illegal state in " + file.getFilenameWithPath() + ":" + previousData.getRevisionNumber());
175 }
176 }
177
178
179 symbolicNames = createSymbolicNamesCollection(currentData);
180
181 final int nextLinesOfCode = currentLOC - getLOCChange(currentData);
182 if (currentData.isCreationOrRestore()) {
183 buildCreationRevision(file, currentData, currentLOC, symbolicNames);
184 } else if (currentData.isDeletion()) {
185 buildDeletionRevision(file, currentData, currentLOC, symbolicNames);
186 buildBeginOfLogRevision(file, beginOfLogDate, nextLinesOfCode, symbolicNames);
187 } else if (currentData.isChange()) {
188 buildChangeRevision(file, currentData, currentLOC, symbolicNames);
189 currentData.setDate(new Date(currentData.getDate().getTime() - ONE_SECOND));
190 buildCreationRevision(file, currentData, 0, symbolicNames);
191 buildBeginOfLogRevision(file, beginOfLogDate, nextLinesOfCode, symbolicNames);
192 } else {
193 SvnConfigurationOptions.getTaskLogger().info("illegal state in " + file.getFilenameWithPath() + ":" + currentData.getRevisionNumber());
194 }
195 return file;
196 }
197
198 /**
199 * Gets a LOC count for the file's most recent revision. If the file exists
200 * in the local checkout, we ask the {@link RepositoryFileManager} to count
201 * its lines of code. If not (that is, it is dead), return an approximated
202 * LOC value for its last non-dead revision.
203 *
204 * @return the LOC count for the file's most recent revision.
205 */
206 private int getFinalLOC() {
207 if (binary) {
208 return 0;
209 }
210
211 String revision = null;
212 try {
213 revision = builder.getRevision(name);
214 } catch (final IOException e) {
215 if (!finalRevisionIsDead()) {
216 SvnConfigurationOptions.getTaskLogger().info(e.getMessage());
217 }
218 }
219
220 try {
221
222
223
224 if (!revisions.isEmpty()) {
225 final RevisionData firstAdded = (RevisionData) revisions.get(0);
226 if (!finalRevisionIsDead() && !firstAdded.getRevisionNumber().equals(revision)) {
227 SvnConfigurationOptions.getTaskLogger().info("Revision of " + name + " does not match expected revision");
228 }
229 }
230 return builder.getLOC(name);
231
232 } catch (final NoLineCountException e) {
233 if (!finalRevisionIsDead()) {
234 SvnConfigurationOptions.getTaskLogger().info(e.getMessage());
235 }
236 return approximateFinalLOC();
237 }
238 }
239
240 /**
241 * Returns <tt>true</tt> if the file's most recent revision is dead.
242 *
243 * @return <tt>true</tt> if the file is dead.
244 */
245 protected boolean finalRevisionIsDead() {
246 if (revisions.isEmpty()) {
247 return false;
248 }
249 return ((RevisionData) revisions.get(0)).isDeletion();
250 }
251
252 /**
253 * Returns <tt>true</tt> if the file has revisions.
254 *
255 * @return Returns <tt>true</tt> if the file has revisions.
256 */
257 public boolean existRevision() {
258 return !revisions.isEmpty();
259 }
260
261 /**
262 * Approximates the LOC count for files that are not present in the local
263 * checkout. If a file was deleted at some point in history, then we can't
264 * count its final lines of code. This algorithm calculates a lower bound
265 * for the file's LOC prior to deletion by following the ups and downs of
266 * the revisions.
267 *
268 * @return a lower bound for the file's LOC before it was deleted
269 */
270 private int approximateFinalLOC() {
271 int max = 0;
272 int current = 0;
273 final Iterator it = revisions.iterator();
274 while (it.hasNext()) {
275 final RevisionData data = (RevisionData) it.next();
276 current += data.getLinesAdded();
277 max = Math.max(current, max);
278 current -= data.getLinesRemoved();
279 }
280 return max;
281 }
282
283 /**
284 * Returns the change in LOC count caused by a revision. If there were 10
285 * lines added and 3 lines removed, 7 would be returned. This does not take
286 * into account file deletion and creation.
287 *
288 * @param data
289 * a revision
290 * @return the change in LOC count
291 */
292 private int getLOCChange(final RevisionData data) {
293 return data.getLinesAdded() - data.getLinesRemoved();
294 }
295
296 private void buildCreationRevision(final VersionedFile file, final RevisionData data, final int loc, final SortedSet symbolicNames) {
297 file.addInitialRevision(data.getRevisionNumber(), builder.getAuthor(data.getLoginName()), data.getDate(), data.getComment(), loc, symbolicNames);
298 }
299
300 private void buildChangeRevision(final VersionedFile file, final RevisionData data, final int loc, final SortedSet symbolicNames) {
301 file.addChangeRevision(data.getRevisionNumber(), builder.getAuthor(data.getLoginName()), data.getDate(), data.getComment(), loc, data.getLinesAdded()
302 - data.getLinesRemoved(), Math.min(data.getLinesAdded(), data.getLinesRemoved()), symbolicNames);
303 }
304
305 private void buildDeletionRevision(final VersionedFile file, final RevisionData data, final int loc, final SortedSet symbolicNames) {
306 file.addDeletionRevision(data.getRevisionNumber(), builder.getAuthor(data.getLoginName()), data.getDate(), data.getComment(), loc, symbolicNames);
307 }
308
309 private void buildBeginOfLogRevision(final VersionedFile file, final Date beginOfLogDate, final int loc, final SortedSet symbolicNames) {
310 final Date date = new Date(beginOfLogDate.getTime() - ONE_MIN_IN_MS);
311 final Revision dummyForMove = file.addBeginOfLogRevision(date, loc, symbolicNames);
312
313
314
315
316 if (symbolicNames != null) {
317 final Iterator it = symbolicNames.iterator();
318 while (it.hasNext()) {
319 ((SymbolicName) it.next()).getRevisions().remove(dummyForMove);
320 }
321 }
322 }
323
324 /**
325 * Takes a filename and checks if it should be processed or not. Can be used
326 * to filter out unwanted files.
327 *
328 * @return <tt>true</tt> if this file should not be processed
329 */
330 private boolean isFilteredFile() {
331 return !this.builder.matchesPatterns(this.name);
332 }
333
334 /**
335 * Returns <tt>false</tt> if the file did never exist in the timespan
336 * covered by the log. For our purposes, a file is non-existant if it has no
337 * revisions and does not exists in the module checkout. Note: A file with
338 * no revisions must be included in the report if it does exist in the
339 * module checkout. This happens if it was created before the log started,
340 * and not changed before the log ended.
341 *
342 * @return <tt>true</tt> if the file did exist at some point in the log
343 * period.
344 */
345 private boolean fileExistsInLogPeriod() {
346 if (revisions.size() > 0 || binary) {
347 return true;
348 }
349 try {
350 builder.getLOC(name);
351 return true;
352 } catch (final NoLineCountException fileDoesNotExistInTimespan) {
353 return false;
354 }
355 }
356
357 /**
358 * Creates a sorted set containing all symbolic name objects affected by
359 * this revision. If this revision has no symbolic names, this method
360 * returns null.
361 *
362 * @param revisionData
363 * this revision
364 * @return the sorted set or null
365 */
366 private SortedSet createSymbolicNamesCollection(final RevisionData revisionData) {
367 SortedSet symbolicNames = null;
368
369 final int currentRevision = getRevisionAsInt(revisionData.getRevisionNumber());
370 SvnConfigurationOptions.getTaskLogger().log("\n" + name + " CURRENT REVISION = " + currentRevision + " Deleted " + revisionData.isDeletion());
371
372 if (revisions.isEmpty()) {
373 SvnConfigurationOptions.getTaskLogger().log("NO Revisions....");
374 return symbolicNames;
375 }
376
377
378 for (final Iterator tags = revBySymnames.entrySet().iterator(); tags.hasNext();) {
379 final Map.Entry tag = (Map.Entry) tags.next();
380
381 final int tagRevision = getRevisionAsInt((String) tag.getValue());
382
383 SvnConfigurationOptions.getTaskLogger().log("Considering tag REV " + tagRevision + " name=" + tag.getKey());
384
385
386
387 int previousRevisionForThisFile = getRevisionAsInt(((RevisionData) revisions.get(revisions.size() - 1)).getRevisionNumber());
388 int revisionToTag = -1;
389 for (final ListIterator it = revisions.listIterator(revisions.size()); it.hasPrevious();) {
390 final RevisionData data = (RevisionData) it.previous();
391
392 SvnConfigurationOptions.getTaskLogger().log(
393 "File REV " + data.getRevisionNumber() + " =>" + data.getDate() + " vs " + tagRevision + " Deletion:" + data.isDeletion());
394
395 final int dataRev = getRevisionAsInt(data.getRevisionNumber());
396
397 if (revisionData.isDeletion() && currentRevision < dataRev) {
398
399
400
401 previousRevisionForThisFile = getRevisionAsInt(data.getRevisionNumber());
402 continue;
403 } else if (dataRev == tagRevision) {
404 revisionToTag = tagRevision;
405 break;
406 } else if (dataRev > tagRevision && tagRevision >= previousRevisionForThisFile) {
407 revisionToTag = previousRevisionForThisFile;
408 SvnConfigurationOptions.getTaskLogger().log("1/ Revision to TAG " + revisionToTag);
409 break;
410 }
411
412 previousRevisionForThisFile = getRevisionAsInt(data.getRevisionNumber());
413 }
414
415
416
417 if (previousRevisionForThisFile < tagRevision && !revisionData.isDeletion()) {
418 revisionToTag = previousRevisionForThisFile;
419 SvnConfigurationOptions.getTaskLogger().log("2/ Revision to TAG " + revisionToTag);
420 }
421
422 SvnConfigurationOptions.getTaskLogger().log("Revision to TAG " + revisionToTag);
423
424 if (revisionToTag > 0 && revisionToTag == currentRevision) {
425
426 if (symbolicNames == null) {
427 symbolicNames = new TreeSet();
428 }
429 SvnConfigurationOptions.getTaskLogger().log(
430 "adding revision " + name + "," + currentRevision + " to symname " + tag.getKey() + " Date:" + dateBySymnames.get(tag.getKey()) + " A:"
431 + revisionData.getLinesAdded() + " R:" + revisionData.getLinesRemoved());
432 symbolicNames.add(builder.getSymbolicName((String) tag.getKey(), (Date) dateBySymnames.get(tag.getKey())));
433 }
434 }
435
436 return symbolicNames;
437 }
438
439 private int getRevisionAsInt(final String revisionNumber) {
440 int rev = 0;
441 if (revisionNumber != null && !revisionNumber.equals("0.0")) {
442 rev = Integer.valueOf(revisionNumber).intValue();
443 }
444 return rev;
445 }
446
447 /**
448 * New in StatSVN: Gives the FileBuilder's filename.
449 *
450 * @todo Beef up this interface to better encapsulate the data structure.
451 *
452 * @return the filename
453 */
454 public String getName() {
455 return name;
456 }
457
458 /**
459 * New in StatSVN: The list of revisions made on this file.
460 *
461 * @todo Beef up this interface to better encapsulate the data structure.
462 *
463 * @return the list of revisions on this file
464 */
465 public List getRevisions() {
466 return revisions;
467 }
468
469 /**
470 * New in StatSVN: Returns a particular revision made on this file or
471 * <tt>null</tt> if it doesn't exist.
472 *
473 * @return a particular revision made on this file or <tt>null</tt> if it
474 * doesn't exist.
475 */
476 private RevisionData findRevision(final String revisionNumber) {
477 for (int i = 0; i < revisions.size(); i++) {
478 final RevisionData data = (RevisionData) revisions.get(i);
479 if (data.getRevisionNumber().equals(revisionNumber)) {
480 return data;
481 }
482 }
483 return null;
484 }
485
486 /**
487 * New in StatSVN: Returns <tt>true</tt> if this file is known to be
488 * binary.
489 *
490 * @todo Beef up this interface to better encapsulate the data structure.
491 *
492 * @return <tt>true</tt> if this file is known to be binary,
493 * <tt>false</tt> otherwise.
494 */
495 public synchronized boolean isBinary() {
496 return binary;
497 }
498
499 /**
500 * New in StatSVN: Sets the file's binary flag.
501 *
502 * @todo Beef up this interface to better encapsulate the data structure.
503 *
504 * @param binary
505 * is the file binary?
506 */
507 public synchronized void setBinary(final boolean isBinary) {
508 this.binary = isBinary;
509 }
510
511 /**
512 * New in StatSVN: Updates a particular revision with new line count
513 * information. If the file or revision does not exist, action will do
514 * nothing.
515 *
516 * Necessary because line counts are not given in the log file and hence can
517 * only be added in a second pass.
518 *
519 * @param revisionNumber
520 * the revision number to be updated
521 * @param linesAdded
522 * the lines that were added
523 * @param linesRemoved
524 * the lines that were removed
525 */
526 public void updateRevision(final String revisionNumber, final int linesAdded, final int linesRemoved) {
527 final RevisionData data = findRevision(revisionNumber);
528 if (data != null) {
529 data.setLines(linesAdded, linesRemoved);
530 }
531 }
532
533 }