View Javadoc

1   /*
2    StatCvs - CVS statistics generation 
3    Copyright (C) 2002  Lukasz Pekacki <lukasz@pekacki.de>
4    http://statcvs.sf.net/
5    
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10  
11   This library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15  
16   You should have received a copy of the GNU Lesser General Public
17   License along with this library; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19   
20   $RCSfile: FileBuilder.java,v $
21   $Date: 2005/03/29 22:45:06 $
22   */
23  package net.sf.statsvn.input;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Date;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.ListIterator;
31  import java.util.Map;
32  import java.util.SortedSet;
33  import java.util.TreeSet;
34  
35  import net.sf.statcvs.input.NoLineCountException;
36  import net.sf.statcvs.model.Revision;
37  import net.sf.statcvs.model.SymbolicName;
38  import net.sf.statcvs.model.VersionedFile;
39  import net.sf.statsvn.output.SvnConfigurationOptions;
40  
41  /**
42   * <p>
43   * Builds a {@link VersionedFile} with {@link Revision}s from logging data.
44   * This class is responsible for deciding if a file or revisions will be
45   * included in the report, for translating from CVS logfile data structures to
46   * the data structures in the <tt>net.sf.statsvn.model</tt> package, and for
47   * calculating the LOC history for the file.
48   * </p>
49   * 
50   * <p>
51   * A main goal of this class is to delay the creation of the
52   * <tt>VersionedFile</tt> object until all revisions of the file have been
53   * collected from the log. We could simply create <tt>VersionedFile</tt> and
54   * <tt>Revision</tt>s on the fly as we parse through the log, but this
55   * creates a problem if we decide not to include the file after reading several
56   * revisions. The creation of a <tt>VersionedFile</tt> or <tt>Revision</tt>
57   * can cause many more objects to be created (<tt>Author</tt>,
58   * <tt>Directory</tt>, <tt>Commit</tt>), and it would be very hard to get
59   * rid of them if we don't want the file. This problem is solved by first
60   * collecting all information about one file in this class, and then, with all
61   * information present, deciding if we want to create the model instances or
62   * not.
63   * </p>
64   * 
65   * @author Richard Cyganiak <richard@cyganiak.de>
66   * @author Tammo van Lessen
67   * @author Jason Kealey <jkealey@shade.ca>
68   * @author Gunter Mussbacher <gunterm@site.uottawa.ca>
69   * @version $Id: FileBuilder.java 351 2008-03-28 18:46:26Z benoitx $
70   */
71  public class FileBuilder {
72  	private static final int ONE_SECOND = 1000;
73  
74  	private static final int ONE_MIN_IN_MS = 60000;
75  
76  	private final Builder builder;
77  
78  	private final String name;
79  
80  	private boolean binary;
81  
82  	private final List revisions = new ArrayList();
83  
84  	private final Map revBySymnames;
85  
86  	private final Map dateBySymnames;
87  
88  	private int locDelta;
89  
90  	/**
91  	 * Creates a new <tt>FileBuilder</tt>.
92  	 * 
93  	 * @param builder
94  	 *            a <tt>Builder</tt> that provides factory services for author
95  	 *            and directory instances and line counts.
96  	 * @param name
97  	 *            the filename
98  	 * @param binary
99  	 *            Is this a binary file or not?
100 	 */
101 	public FileBuilder(final Builder builder, final String name, final boolean isBinary, final Map revBySymnames, final Map dateBySymnames) {
102 		this.builder = builder;
103 		this.name = name;
104 		this.binary = isBinary;
105 		this.revBySymnames = revBySymnames;
106 		this.dateBySymnames = dateBySymnames;
107 
108 		SvnConfigurationOptions.getTaskLogger().log("logging " + name);
109 	}
110 
111 	/**
112 	 * Adds a revision to the file. The revisions must be added in the same
113 	 * order as they appear in the CVS logfile, that is, most recent first.
114 	 * 
115 	 * @param data
116 	 *            the revision
117 	 */
118 	public void addRevisionData(final RevisionData data) {
119 		if (binary && !data.isCreationOrRestore()) {
120 			data.setLines(0, 0);
121 		}
122 		this.revisions.add(data);
123 
124 		locDelta += getLOCChange(data);
125 	}
126 
127 	/**
128 	 * Creates and returns a {@link VersionedFile} representation of the file.
129 	 * <tt>null</tt> is returned if the file does not meet certain criteria,
130 	 * for example if its filename meets an exclude filter or if it was dead
131 	 * during the entire logging timespan.
132 	 * 
133 	 * @param beginOfLogDate
134 	 *            the date of the begin of the log
135 	 * @return a <tt>VersionedFile</tt> representation of the file.
136 	 */
137 	public VersionedFile createFile(final Date beginOfLogDate) {
138 		if (isFilteredFile() || !fileExistsInLogPeriod()) {
139 			return null;
140 		}
141 
142 		final VersionedFile file = new VersionedFile(name, builder.getDirectory(name));
143 
144 		if (revisions.isEmpty()) {
145 			buildBeginOfLogRevision(file, beginOfLogDate, getFinalLOC(), null);
146 			return file;
147 		}
148 
149 		final Iterator it = revisions.iterator();
150 		RevisionData currentData = (RevisionData) it.next();
151 		int currentLOC = getFinalLOC();
152 		RevisionData previousData;
153 		int previousLOC;
154 		SortedSet symbolicNames;
155 
156 		while (it.hasNext()) {
157 			previousData = currentData;
158 			previousLOC = currentLOC;
159 			currentData = (RevisionData) it.next();
160 			currentLOC = previousLOC - getLOCChange(previousData);
161 
162 			// symbolic names for previousData
163 			symbolicNames = createSymbolicNamesCollection(previousData);
164 
165 			if (previousData.isCreationOrRestore() || previousData.isChange() || isBinary()) {
166 				if (currentData.isDeletion()) {
167 					buildCreationRevision(file, previousData, previousLOC, symbolicNames);
168 				} else {
169 					buildChangeRevision(file, previousData, previousLOC, symbolicNames);
170 				}
171 			} else if (previousData.isDeletion()) {
172 				buildDeletionRevision(file, previousData, previousLOC, symbolicNames);
173 			} else {
174 				SvnConfigurationOptions.getTaskLogger().info("illegal state in " + file.getFilenameWithPath() + ":" + previousData.getRevisionNumber());
175 			}
176 		}
177 
178 		// symbolic names for currentData
179 		symbolicNames = createSymbolicNamesCollection(currentData);
180 
181 		final int nextLinesOfCode = currentLOC - getLOCChange(currentData);
182 		if (currentData.isCreationOrRestore()) {
183 			buildCreationRevision(file, currentData, currentLOC, symbolicNames);
184 		} else if (currentData.isDeletion()) {
185 			buildDeletionRevision(file, currentData, currentLOC, symbolicNames);
186 			buildBeginOfLogRevision(file, beginOfLogDate, nextLinesOfCode, symbolicNames);
187 		} else if (currentData.isChange()) {
188 			buildChangeRevision(file, currentData, currentLOC, symbolicNames);
189 			currentData.setDate(new Date(currentData.getDate().getTime() - ONE_SECOND));
190 			buildCreationRevision(file, currentData, 0, symbolicNames);
191 			buildBeginOfLogRevision(file, beginOfLogDate, nextLinesOfCode, symbolicNames);
192 		} else {
193 			SvnConfigurationOptions.getTaskLogger().info("illegal state in " + file.getFilenameWithPath() + ":" + currentData.getRevisionNumber());
194 		}
195 		return file;
196 	}
197 
198 	/**
199 	 * Gets a LOC count for the file's most recent revision. If the file exists
200 	 * in the local checkout, we ask the {@link RepositoryFileManager} to count
201 	 * its lines of code. If not (that is, it is dead), return an approximated
202 	 * LOC value for its last non-dead revision.
203 	 * 
204 	 * @return the LOC count for the file's most recent revision.
205 	 */
206 	private int getFinalLOC() {
207 		if (binary) {
208 			return 0;
209 		}
210 
211 		String revision = null;
212 		try {
213 			revision = builder.getRevision(name);
214 		} catch (final IOException e) {
215 			if (!finalRevisionIsDead()) {
216 				SvnConfigurationOptions.getTaskLogger().info(e.getMessage());
217 			}
218 		}
219 
220 		try {
221 			// if ("1.1".equals(revision)) {
222 			// return builder.getLOC(name) + locDelta;
223 			// } else {
224 			if (!revisions.isEmpty()) {
225 				final RevisionData firstAdded = (RevisionData) revisions.get(0);
226 				if (!finalRevisionIsDead() && !firstAdded.getRevisionNumber().equals(revision)) {
227 					SvnConfigurationOptions.getTaskLogger().info("Revision of " + name + " does not match expected revision");
228 				}
229 			}
230 			return builder.getLOC(name);
231 			// }
232 		} catch (final NoLineCountException e) {
233 			if (!finalRevisionIsDead()) {
234 				SvnConfigurationOptions.getTaskLogger().info(e.getMessage());
235 			}
236 			return approximateFinalLOC();
237 		}
238 	}
239 
240 	/**
241 	 * Returns <tt>true</tt> if the file's most recent revision is dead.
242 	 * 
243 	 * @return <tt>true</tt> if the file is dead.
244 	 */
245 	protected boolean finalRevisionIsDead() {
246 		if (revisions.isEmpty()) {
247 			return false;
248 		}
249 		return ((RevisionData) revisions.get(0)).isDeletion();
250 	}
251 
252 	/**
253 	 * Returns <tt>true</tt> if the file has revisions.
254 	 * 
255 	 * @return Returns <tt>true</tt> if the file has revisions.
256 	 */
257 	public boolean existRevision() {
258 		return !revisions.isEmpty();
259 	}
260 
261 	/**
262 	 * Approximates the LOC count for files that are not present in the local
263 	 * checkout. If a file was deleted at some point in history, then we can't
264 	 * count its final lines of code. This algorithm calculates a lower bound
265 	 * for the file's LOC prior to deletion by following the ups and downs of
266 	 * the revisions.
267 	 * 
268 	 * @return a lower bound for the file's LOC before it was deleted
269 	 */
270 	private int approximateFinalLOC() {
271 		int max = 0;
272 		int current = 0;
273 		final Iterator it = revisions.iterator();
274 		while (it.hasNext()) {
275 			final RevisionData data = (RevisionData) it.next();
276 			current += data.getLinesAdded();
277 			max = Math.max(current, max);
278 			current -= data.getLinesRemoved();
279 		}
280 		return max;
281 	}
282 
283 	/**
284 	 * Returns the change in LOC count caused by a revision. If there were 10
285 	 * lines added and 3 lines removed, 7 would be returned. This does not take
286 	 * into account file deletion and creation.
287 	 * 
288 	 * @param data
289 	 *            a revision
290 	 * @return the change in LOC count
291 	 */
292 	private int getLOCChange(final RevisionData data) {
293 		return data.getLinesAdded() - data.getLinesRemoved();
294 	}
295 
296 	private void buildCreationRevision(final VersionedFile file, final RevisionData data, final int loc, final SortedSet symbolicNames) {
297 		file.addInitialRevision(data.getRevisionNumber(), builder.getAuthor(data.getLoginName()), data.getDate(), data.getComment(), loc, symbolicNames);
298 	}
299 
300 	private void buildChangeRevision(final VersionedFile file, final RevisionData data, final int loc, final SortedSet symbolicNames) {
301 		file.addChangeRevision(data.getRevisionNumber(), builder.getAuthor(data.getLoginName()), data.getDate(), data.getComment(), loc, data.getLinesAdded()
302 		        - data.getLinesRemoved(), Math.min(data.getLinesAdded(), data.getLinesRemoved()), symbolicNames);
303 	}
304 
305 	private void buildDeletionRevision(final VersionedFile file, final RevisionData data, final int loc, final SortedSet symbolicNames) {
306 		file.addDeletionRevision(data.getRevisionNumber(), builder.getAuthor(data.getLoginName()), data.getDate(), data.getComment(), loc, symbolicNames);
307 	}
308 
309 	private void buildBeginOfLogRevision(final VersionedFile file, final Date beginOfLogDate, final int loc, final SortedSet symbolicNames) {
310 		final Date date = new Date(beginOfLogDate.getTime() - ONE_MIN_IN_MS);
311 		final Revision dummyForMove = file.addBeginOfLogRevision(date, loc, symbolicNames);
312 
313 		// + BX: DO NOT add a 0.0 revision to this SymbolicNames set as this
314 		// would duplicate the impact of the
315 		// move on the TAG set.
316 		if (symbolicNames != null) {
317 			final Iterator it = symbolicNames.iterator();
318 			while (it.hasNext()) {
319 				((SymbolicName) it.next()).getRevisions().remove(dummyForMove);
320 			}
321 		}
322 	}
323 
324 	/**
325 	 * Takes a filename and checks if it should be processed or not. Can be used
326 	 * to filter out unwanted files.
327 	 * 
328 	 * @return <tt>true</tt> if this file should not be processed
329 	 */
330 	private boolean isFilteredFile() {
331 		return !this.builder.matchesPatterns(this.name);
332 	}
333 
334 	/**
335 	 * Returns <tt>false</tt> if the file did never exist in the timespan
336 	 * covered by the log. For our purposes, a file is non-existant if it has no
337 	 * revisions and does not exists in the module checkout. Note: A file with
338 	 * no revisions must be included in the report if it does exist in the
339 	 * module checkout. This happens if it was created before the log started,
340 	 * and not changed before the log ended.
341 	 * 
342 	 * @return <tt>true</tt> if the file did exist at some point in the log
343 	 *         period.
344 	 */
345 	private boolean fileExistsInLogPeriod() {
346 		if (revisions.size() > 0 || binary) {
347 			return true;
348 		}
349 		try {
350 			builder.getLOC(name);
351 			return true;
352 		} catch (final NoLineCountException fileDoesNotExistInTimespan) {
353 			return false;
354 		}
355 	}
356 
357 	/**
358 	 * Creates a sorted set containing all symbolic name objects affected by
359 	 * this revision. If this revision has no symbolic names, this method
360 	 * returns null.
361 	 * 
362 	 * @param revisionData
363 	 *            this revision
364 	 * @return the sorted set or null
365 	 */
366 	private SortedSet createSymbolicNamesCollection(final RevisionData revisionData) {
367 		SortedSet symbolicNames = null;
368 
369 		final int currentRevision = getRevisionAsInt(revisionData.getRevisionNumber());
370 		SvnConfigurationOptions.getTaskLogger().log("\n" + name + " CURRENT REVISION = " + currentRevision + " Deleted " + revisionData.isDeletion());
371 
372 		if (revisions.isEmpty()) {
373 			SvnConfigurationOptions.getTaskLogger().log("NO Revisions....");
374 			return symbolicNames;
375 		}
376 
377 		// go through each possible tag
378 		for (final Iterator tags = revBySymnames.entrySet().iterator(); tags.hasNext();) {
379 			final Map.Entry tag = (Map.Entry) tags.next();
380 
381 			final int tagRevision = getRevisionAsInt((String) tag.getValue());
382 
383 			SvnConfigurationOptions.getTaskLogger().log("Considering tag REV " + tagRevision + " name=" + tag.getKey());
384 
385 			// go through the revisions for this file
386 			// in order to find either the rev ON the tag or JUST BEFORE!
387 			int previousRevisionForThisFile = getRevisionAsInt(((RevisionData) revisions.get(revisions.size() - 1)).getRevisionNumber());
388 			int revisionToTag = -1;
389 			for (final ListIterator it = revisions.listIterator(revisions.size()); it.hasPrevious();) {
390 				final RevisionData data = (RevisionData) it.previous();
391 
392 				SvnConfigurationOptions.getTaskLogger().log(
393 				        "File REV " + data.getRevisionNumber() + " =>" + data.getDate() + " vs " + tagRevision + " Deletion:" + data.isDeletion());
394 
395 				final int dataRev = getRevisionAsInt(data.getRevisionNumber());
396 
397 				if (revisionData.isDeletion() && currentRevision < dataRev) {
398 					// the file is deleted (revisionData.isDeletion) AND the
399 					// currentRevision is BEFORE the current tag
400 					// so we should not tag this.
401 					previousRevisionForThisFile = getRevisionAsInt(data.getRevisionNumber());
402 					continue;
403 				} else if (dataRev == tagRevision) {
404 					revisionToTag = tagRevision;
405 					break;
406 				} else if (dataRev > tagRevision && tagRevision >= previousRevisionForThisFile) {
407 					revisionToTag = previousRevisionForThisFile;
408 					SvnConfigurationOptions.getTaskLogger().log("1/ Revision to TAG " + revisionToTag);
409 					break;
410 				}
411 
412 				previousRevisionForThisFile = getRevisionAsInt(data.getRevisionNumber());
413 			}
414 
415 			// if the LAST revision for this fuke is before the TAG revision
416 			// and the file is NOT deleted, then we should tag it!
417 			if (previousRevisionForThisFile < tagRevision && !revisionData.isDeletion()) {
418 				revisionToTag = previousRevisionForThisFile;
419 				SvnConfigurationOptions.getTaskLogger().log("2/ Revision to TAG " + revisionToTag);
420 			}
421 
422 			SvnConfigurationOptions.getTaskLogger().log("Revision to TAG " + revisionToTag);
423 
424 			if (revisionToTag > 0 && revisionToTag == currentRevision) {
425 				// previous revision is the last one for this tag
426 				if (symbolicNames == null) {
427 					symbolicNames = new TreeSet();
428 				}
429 				SvnConfigurationOptions.getTaskLogger().log(
430 				        "adding revision " + name + "," + currentRevision + " to symname " + tag.getKey() + " Date:" + dateBySymnames.get(tag.getKey()) + " A:"
431 				                + revisionData.getLinesAdded() + " R:" + revisionData.getLinesRemoved());
432 				symbolicNames.add(builder.getSymbolicName((String) tag.getKey(), (Date) dateBySymnames.get(tag.getKey())));
433 			}
434 		}
435 
436 		return symbolicNames;
437 	}
438 
439 	private int getRevisionAsInt(final String revisionNumber) {
440 		int rev = 0;
441 		if (revisionNumber != null && !revisionNumber.equals("0.0")) {
442 			rev = Integer.valueOf(revisionNumber).intValue();
443 		}
444 		return rev;
445 	}
446 
447 	/**
448 	 * New in StatSVN: Gives the FileBuilder's filename.
449 	 * 
450 	 * @todo Beef up this interface to better encapsulate the data structure.
451 	 * 
452 	 * @return the filename
453 	 */
454 	public String getName() {
455 		return name;
456 	}
457 
458 	/**
459 	 * New in StatSVN: The list of revisions made on this file.
460 	 * 
461 	 * @todo Beef up this interface to better encapsulate the data structure.
462 	 * 
463 	 * @return the list of revisions on this file
464 	 */
465 	public List getRevisions() {
466 		return revisions;
467 	}
468 
469 	/**
470 	 * New in StatSVN: Returns a particular revision made on this file or
471 	 * <tt>null</tt> if it doesn't exist.
472 	 * 
473 	 * @return a particular revision made on this file or <tt>null</tt> if it
474 	 *         doesn't exist.
475 	 */
476 	private RevisionData findRevision(final String revisionNumber) {
477 		for (int i = 0; i < revisions.size(); i++) {
478 			final RevisionData data = (RevisionData) revisions.get(i);
479 			if (data.getRevisionNumber().equals(revisionNumber)) {
480 				return data;
481 			}
482 		}
483 		return null;
484 	}
485 
486 	/**
487 	 * New in StatSVN: Returns <tt>true</tt> if this file is known to be
488 	 * binary.
489 	 * 
490 	 * @todo Beef up this interface to better encapsulate the data structure.
491 	 * 
492 	 * @return <tt>true</tt> if this file is known to be binary,
493 	 *         <tt>false</tt> otherwise.
494 	 */
495 	public synchronized boolean isBinary() {
496 		return binary;
497 	}
498 
499 	/**
500 	 * New in StatSVN: Sets the file's binary flag.
501 	 * 
502 	 * @todo Beef up this interface to better encapsulate the data structure.
503 	 * 
504 	 * @param binary
505 	 *            is the file binary?
506 	 */
507 	public synchronized void setBinary(final boolean isBinary) {
508 		this.binary = isBinary;
509 	}
510 
511 	/**
512 	 * New in StatSVN: Updates a particular revision with new line count
513 	 * information. If the file or revision does not exist, action will do
514 	 * nothing.
515 	 * 
516 	 * Necessary because line counts are not given in the log file and hence can
517 	 * only be added in a second pass.
518 	 * 
519 	 * @param revisionNumber
520 	 *            the revision number to be updated
521 	 * @param linesAdded
522 	 *            the lines that were added
523 	 * @param linesRemoved
524 	 *            the lines that were removed
525 	 */
526 	public void updateRevision(final String revisionNumber, final int linesAdded, final int linesRemoved) {
527 		final RevisionData data = findRevision(revisionNumber);
528 		if (data != null) {
529 			data.setLines(linesAdded, linesRemoved);
530 		}
531 	}
532 
533 }