View Javadoc

1   /*
2    StatCvs - CVS statistics generation 
3    Copyright (C) 2002  Lukasz Pekacki <lukasz@pekacki.de>
4    http://statcvs.sf.net/
5    
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10  
11   This library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15  
16   You should have received a copy of the GNU Lesser General Public
17   License along with this library; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19   
20   $RCSfile: SvnLogfileParser.java,v $ 
21   Created on $Date: 2004/10/10 11:29:07 $ 
22   */
23  
24  package net.sf.statsvn.input;
25  
26  import java.io.FileInputStream;
27  import java.io.FileNotFoundException;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Collections;
33  import java.util.Date;
34  import java.util.HashMap;
35  import java.util.HashSet;
36  import java.util.Iterator;
37  import java.util.List;
38  import java.util.Map;
39  import java.util.Vector;
40  
41  import javax.xml.parsers.ParserConfigurationException;
42  import javax.xml.parsers.SAXParser;
43  import javax.xml.parsers.SAXParserFactory;
44  
45  import net.sf.statcvs.input.LogSyntaxException;
46  import net.sf.statsvn.output.SvnConfigurationOptions;
47  import net.sf.statsvn.util.BinaryDiffException;
48  import net.sf.statsvn.util.FilenameComparator;
49  import net.sf.statsvn.util.SvnDiffUtils;
50  import net.sf.statsvn.util.XMLUtil;
51  
52  import org.xml.sax.SAXException;
53  
54  import edu.emory.mathcs.backport.java.util.concurrent.ExecutorService;
55  import edu.emory.mathcs.backport.java.util.concurrent.Executors;
56  import edu.emory.mathcs.backport.java.util.concurrent.TimeUnit;
57  
58  /**
59   * Parses a Subversion logfile and does post-parse processing. A {@link Builder}
60   * must be specified which does the construction work.
61   * 
62   * @author Jason Kealey <jkealey@shade.ca>
63   * @author Gunter Mussbacher <gunterm@site.uottawa.ca>
64   * 
65   * @version $Id: SvnLogfileParser.java 368 2008-06-25 21:23:46Z benoitx $
66   */
67  public class SvnLogfileParser {
68      private static final int INTERMEDIARY_SAVE_INTERVAL_MS = 120000;
69  
70      private static final String REPOSITORIES_XML = "repositories.xml";
71  
72      private final SvnLogBuilder builder;
73  
74      private final InputStream logFile;
75  
76      private final RepositoryFileManager repositoryFileManager;
77  
78      private CacheBuilder cacheBuilder;
79  
80      private HashSet revsForNewDiff = null;
81  
82      /**
83       * Default Constructor
84       * 
85       * @param repositoryFileManager
86       *            the repository file manager
87       * @param logFile
88       *            a <tt>Reader</tt> containing the SVN logfile
89       * @param builder
90       *            the builder that will process the log information
91       */
92      public SvnLogfileParser(final RepositoryFileManager repositoryFileManager, final InputStream logFile, final SvnLogBuilder builder) {
93          this.logFile = logFile;
94          this.builder = builder;
95          this.repositoryFileManager = repositoryFileManager;
96      }
97  
98      /**
99       * Because the log file does not contain the lines added or removed in a
100      * commit, and because the logfile contains implicit actions (@link
101      * #verifyImplicitActions()), we must query the repository for line
102      * differences. This method uses the (@link LineCountsBuilder) to load the
103      * persisted information and (@link SvnDiffUtils) to find new information.
104      * 
105      * @param factory
106      *            the factory used to create SAX parsers.
107      * @throws IOException
108      */
109     protected void handleLineCounts(final SAXParserFactory factory) throws IOException {
110         long startTime = System.currentTimeMillis();
111         final String xmlFile = SvnConfigurationOptions.getCacheDir() + REPOSITORIES_XML;
112 
113         final RepositoriesBuilder repositoriesBuilder = readAndParseXmlFile(factory, xmlFile);
114         cacheFileName = SvnConfigurationOptions.getCacheDir() + repositoriesBuilder.getFileName(repositoryFileManager.getRepositoryUuid());
115         XMLUtil.writeXmlFile(repositoriesBuilder.getDocument(), xmlFile);
116         SvnConfigurationOptions.getTaskLogger().log("parsing repositories finished in " + (System.currentTimeMillis() - startTime) + " ms.");
117         startTime = System.currentTimeMillis();
118 
119         readCache(factory);
120         SvnConfigurationOptions.getTaskLogger().log("parsing line counts finished in " + (System.currentTimeMillis() - startTime) + " ms.");
121         startTime = System.currentTimeMillis();
122 
123         // update the cache xml file with the latest binary status information
124         // from the working copy
125         cacheBuilder.updateBinaryStatus(builder.getFileBuilders().values(), repositoryFileManager.getRootRevisionNumber());
126 
127         final Collection fileBuilders = builder.getFileBuilders().values();
128 
129         calculateNumberRequiredCalls(fileBuilders);
130 
131         // concurrency
132         ExecutorService poolService = null;
133         if (SvnConfigurationOptions.getNumberSvnDiffThreads() > 1) {
134             poolService = Executors.newFixedThreadPool(SvnConfigurationOptions.getNumberSvnDiffThreads());
135         }
136 
137         boolean isFirstDiff = true;
138         calls = 0;
139         groupStart = System.currentTimeMillis();
140         boolean poolUseRequired = false;
141 
142         if (SvnConfigurationOptions.isLegacyDiff()) {
143             for (final Iterator iter = fileBuilders.iterator(); iter.hasNext();) {
144                 final FileBuilder fileBuilder = (FileBuilder) iter.next();
145                 final String fileName = fileBuilder.getName();
146                 if (fileBuilder.isBinary() || !builder.matchesPatterns(fileName)) {
147                     continue;
148                 }
149                 final List revisions = fileBuilder.getRevisions();
150                 for (int i = 0; i < revisions.size(); i++) {
151                     // line diffs are expensive operations. therefore, the
152                     // result is
153                     // stored in the
154                     // cacheBuilder and eventually persisted in the cache xml
155                     // file.
156                     // the next time
157                     // the file is read the line diffs (or 0/0 in case of binary
158                     // files) are intialized
159                     // in the RevisionData. this cause hasNoLines to be false
160                     // which
161                     // in turn causes the
162                     // if clause below to be skipped.
163                     if (i + 1 < revisions.size() && ((RevisionData) revisions.get(i)).hasNoLines() && !((RevisionData) revisions.get(i)).isDeletion()) {
164                         if (((RevisionData) revisions.get(i + 1)).isDeletion()) {
165                             continue;
166                         }
167                         final String revNrNew = ((RevisionData) revisions.get(i)).getRevisionNumber();
168                         if (cacheBuilder.isBinary(fileName, revNrNew)) {
169                             continue;
170                         }
171                         final String revNrOld = ((RevisionData) revisions.get(i + 1)).getRevisionNumber();
172 
173                         if (isFirstDiff) {
174                             SvnConfigurationOptions.getTaskLogger().info("Contacting server to obtain line count information.");
175                             SvnConfigurationOptions.getTaskLogger().info(
176                                     "This information will be cached so that the next time you run StatSVN, results will be returned more quickly.");
177 
178                             if (SvnConfigurationOptions.isLegacyDiff()) {
179                                 SvnConfigurationOptions.getTaskLogger().info("Using the legacy Subversion 1.3 diff mechanism: one diff per file per revision.");
180                             } else {
181                                 SvnConfigurationOptions.getTaskLogger().info("Using the Subversion 1.4 diff mechanism: one diff per revision.");
182                             }
183 
184                             isFirstDiff = false;
185                         }
186 
187                         final DiffTask diff = new DiffTask(fileName, revNrNew, revNrOld, fileBuilder);
188 
189                         // SvnConfigurationOptions.getTaskLogger().log(Thread.currentThread().getName()
190                         // + " Schedule task for " + fileName + " rev:" +
191                         // revNrNew);
192 
193                         poolUseRequired = executeTask(poolService, poolUseRequired, diff);
194                     }
195                 }
196             }
197         } else {
198             for (final Iterator iter = revsForNewDiff.iterator(); iter.hasNext();) {
199                 final String revNrNew = (String) iter.next();
200                 final PerRevDiffTask diff = new PerRevDiffTask(revNrNew, builder.getFileBuilders());
201 
202                 poolUseRequired = executeTask(poolService, poolUseRequired, diff);
203             }
204 
205         }
206         waitForPoolIfRequired(poolService);
207         SvnConfigurationOptions.getTaskLogger().log("parsing svn diff");
208         XMLUtil.writeXmlFile(cacheBuilder.getDocument(), cacheFileName);
209         SvnConfigurationOptions.getTaskLogger().log("parsing svn diff finished in " + (System.currentTimeMillis() - startTime) + " ms.");
210     }
211 
212     private boolean executeTask(final ExecutorService poolService, boolean poolUseRequired, final DiffTask diff) {
213         if (poolUseRequired && SvnConfigurationOptions.getNumberSvnDiffThreads() > 1) {
214             poolService.execute(diff);
215         } else {
216             final long start = System.currentTimeMillis();
217             diff.run();
218             final long end = System.currentTimeMillis();
219             poolUseRequired = (end - start) > SvnConfigurationOptions.getThresholdInMsToUseConcurrency();
220         }
221         return poolUseRequired;
222     }
223 
224     private void waitForPoolIfRequired(final ExecutorService poolService) {
225         if (SvnConfigurationOptions.getNumberSvnDiffThreads() > 1 && poolService != null) {
226             SvnConfigurationOptions.getTaskLogger().info(
227                     "Scheduled " + requiredDiffCalls + " svn diff calls on " + Math.min(requiredDiffCalls, SvnConfigurationOptions.getNumberSvnDiffThreads())
228                             + " threads.");
229             poolService.shutdown();
230             try {
231                 SvnConfigurationOptions.getTaskLogger().log("================ Wait for completion =========================");
232                 if (!poolService.awaitTermination(2, TimeUnit.DAYS)) {
233                     SvnConfigurationOptions.getTaskLogger().log("================ TIME OUT!!! =========================");
234                 }
235             } catch (final InterruptedException e) {
236                 SvnConfigurationOptions.getTaskLogger().error(e.toString());
237             }
238         }
239     }
240 
241     private void calculateNumberRequiredCalls(final Collection fileBuilders) {
242         // Calculate the number of required calls...
243         requiredDiffCalls = 0;
244 
245         if (!SvnConfigurationOptions.isLegacyDiff()) {
246             revsForNewDiff = new HashSet();
247         }
248 
249         for (final Iterator iter = fileBuilders.iterator(); iter.hasNext();) {
250             final FileBuilder fileBuilder = (FileBuilder) iter.next();
251             final String fileName = fileBuilder.getName();
252             if (!fileBuilder.isBinary() && builder.matchesPatterns(fileName)) {
253                 final List revisions = fileBuilder.getRevisions();
254                 for (int i = 0; i < revisions.size(); i++) {
255                     if (i + 1 < revisions.size() && ((RevisionData) revisions.get(i)).hasNoLines() && !((RevisionData) revisions.get(i)).isDeletion()) {
256                         if (((RevisionData) revisions.get(i + 1)).isDeletion()) {
257                             continue;
258                         }
259                         final String revNrNew = ((RevisionData) revisions.get(i)).getRevisionNumber();
260                         if (cacheBuilder.isBinary(fileName, revNrNew)) {
261                             continue;
262                         }
263                         // count if legacy diff or this rev wasn't already
264                         // counted.
265                         if (revsForNewDiff == null || !revsForNewDiff.contains(revNrNew)) {
266                             requiredDiffCalls++;
267 
268                             if (revsForNewDiff != null) {
269                                 revsForNewDiff.add(revNrNew);
270                             }
271                         }
272                     }
273                 }
274             }
275         }
276         // END Calculate the number of required calls...
277     }
278 
279     private void readCache(final SAXParserFactory factory) throws IOException {
280         cacheBuilder = new CacheBuilder(builder, repositoryFileManager);
281         FileInputStream cacheFile = null;
282         try {
283             cacheFile = new FileInputStream(cacheFileName);
284             final SAXParser parser = factory.newSAXParser();
285             parser.parse(cacheFile, new SvnXmlCacheFileHandler(cacheBuilder));
286             cacheFile.close();
287         } catch (final ParserConfigurationException e) {
288             SvnConfigurationOptions.getTaskLogger().error("Cache: " + e.toString());
289         } catch (final SAXException e) {
290             SvnConfigurationOptions.getTaskLogger().error("Cache: " + e.toString());
291         } catch (final FileNotFoundException e) {
292             SvnConfigurationOptions.getTaskLogger().log("Cache: " + e.toString());
293         } catch (final IOException e) {
294             SvnConfigurationOptions.getTaskLogger().error("Cache: " + e.toString());
295         } finally {
296             if (cacheFile != null) {
297                 cacheFile.close();
298             }
299         }
300     }
301 
302     private RepositoriesBuilder readAndParseXmlFile(final SAXParserFactory factory, final String xmlFile) throws IOException {
303         final RepositoriesBuilder repositoriesBuilder = new RepositoriesBuilder();
304         FileInputStream repositoriesFile = null;
305         try {
306             repositoriesFile = new FileInputStream(xmlFile);
307             final SAXParser parser = factory.newSAXParser();
308             parser.parse(repositoriesFile, new SvnXmlRepositoriesFileHandler(repositoriesBuilder));
309             repositoriesFile.close();
310         } catch (final ParserConfigurationException e) {
311             SvnConfigurationOptions.getTaskLogger().error("Repositories: " + e.toString());
312         } catch (final SAXException e) {
313             SvnConfigurationOptions.getTaskLogger().error("Repositories: " + e.toString());
314         } catch (final FileNotFoundException e) {
315             SvnConfigurationOptions.getTaskLogger().log("Repositories: " + e.toString());
316         } catch (final IOException e) {
317             SvnConfigurationOptions.getTaskLogger().error("Repositories: " + e.toString());
318         } finally {
319             if (repositoriesFile != null) {
320                 repositoriesFile.close();
321             }
322         }
323         return repositoriesBuilder;
324     }
325 
326     /**
327      * Parses the logfile. After <tt>parse()</tt> has finished, the result of
328      * the parsing process can be obtained from the builder.
329      * 
330      * @throws LogSyntaxException
331      *             if syntax errors in log
332      * @throws IOException
333      *             if errors while reading from the log Reader
334      */
335     public void parse() throws LogSyntaxException, IOException {
336 
337         final SAXParserFactory factory = parseSvnLog();
338 
339         verifyImplicitActions();
340 
341         // must be after verifyImplicitActions();
342         removeDirectories();
343 
344         handleLineCounts(factory);
345 
346     }
347 
348     /**
349      * The svn log can contain deletions of directories which imply that all of
350      * its contents have been deleted.
351      * 
352      * Furthermore, the svn log can contain entries which are copies from other
353      * directories (additions or replacements; I haven't seen modifications with
354      * this property, but am not 100% sure) meaning that all files from the
355      * other directory are copied here. We currently do not go back through
356      * copies, so we must infer what files <i>could</i> have been added during
357      * those copies.
358      * 
359      */
360     protected void verifyImplicitActions() {
361         // this method most certainly has issues with implicit actions on root
362         // folder.
363 
364         final long startTime = System.currentTimeMillis();
365         SvnConfigurationOptions.getTaskLogger().log("verifying implicit actions ...");
366 
367         final HashSet implicitActions = new HashSet();
368 
369         // get all filenames
370         final ArrayList files = new ArrayList();
371         final Collection fileBuilders = fetchAllFileNames(files);
372 
373         // sort them so that folders are immediately followed by the folder
374         // entries and then by other files which are prefixed by the folder
375         // name.
376         Collections.sort(files, new FilenameComparator());
377 
378         // for each file
379         for (int i = 0; i < files.size(); i++) {
380             final String parent = files.get(i).toString();
381             final FileBuilder parentBuilder = (FileBuilder) builder.getFileBuilders().get(parent);
382             // check to see if there are files that indicate that parent is a
383             // folder.
384             for (int j = i + 1; j < files.size() && files.get(j).toString().indexOf(parent + "/") == 0; j++) {
385                 // we might not know that it was a folder.
386                 repositoryFileManager.addDirectory(parent);
387 
388                 final String child = files.get(j).toString();
389                 final FileBuilder childBuilder = (FileBuilder) builder.getFileBuilders().get(child);
390                 // for all revisions in the the parent folder
391                 for (final Iterator iter = parentBuilder.getRevisions().iterator(); iter.hasNext();) {
392                     final RevisionData parentData = (RevisionData) iter.next();
393                     int parentRevision;
394                     try {
395                         parentRevision = Integer.parseInt(parentData.getRevisionNumber());
396                     } catch (final Exception e) {
397                         continue;
398                     }
399 
400                     // ignore modifications to folders
401                     if (parentData.isCreationOrRestore() || parentData.isDeletion()) {
402                         int k;
403 
404                         // check to see if the parent revision is an implicit
405                         // action acting on the child.
406                         k = detectActionOnChildGivenActionOnParent(childBuilder, parentRevision);
407 
408                         // we found something to insert
409                         if (k < childBuilder.getRevisions().size()) {
410                             createImplicitAction(implicitActions, child, childBuilder, parentData, k);
411                         }
412                     }
413                 }
414             }
415         }
416 
417         // Some implicit revisions may have resulted in double deletion
418         // (e.g. deleting a directory and THEN deleting the parent directory).
419         // this will get rid of any consecutive deletion.
420         cleanPotentialDuplicateImplicitActions(fileBuilders);
421 
422         // in the preceeding block, we add implicit additions to too may files.
423         // possibly a folder was deleted and restored later on, without the
424         // specific file being re-added. we get rid of those here. however,
425         // without knowledge of what was copied during the implicit additions /
426         // replacements, we will remove as many implicit actions as possible
427         // 
428         // this solution is imperfect.
429 
430         // Examples:
431         // IA ID IA ID M A -> ID M A
432         // IA ID A D M A -> ID A D M A
433         removePotentialInconsistencies(implicitActions, fileBuilders);
434         SvnConfigurationOptions.getTaskLogger().log("verifying implicit actions finished in " + (System.currentTimeMillis() - startTime) + " ms.");
435     }
436 
437     private void createImplicitAction(final HashSet implicitActions, final String child, final FileBuilder childBuilder, final RevisionData parentData,
438             final int k) {
439         // we want to memorize this implicit action.
440         final RevisionData implicit = parentData.createCopy();
441         implicitActions.add(implicit);
442 
443         // avoid concurrent modification errors.
444         final List toMove = new ArrayList();
445         for (final Iterator it = childBuilder.getRevisions().subList(k, childBuilder.getRevisions().size()).iterator(); it.hasNext();) {
446             final RevisionData revToMove = (RevisionData) it.next();
447             // if
448             // (!revToMove.getRevisionNumber().equals(implicit.getRevisionNumber()))
449             // {
450             toMove.add(revToMove);
451             // }
452         }
453 
454         // remove the revisions to be moved.
455         childBuilder.getRevisions().removeAll(toMove);
456 
457         // don't call addRevision directly. buildRevision
458         // does more.
459         builder.buildFile(child, false, false, new HashMap(), new HashMap());
460 
461         // only add the implicit if the last one for the
462         // file is NOT a deletion!
463         // if (!toMove.isEmpty() && !((RevisionData)
464         // toMove.get(0)).isDeletion()) {
465         builder.buildRevision(implicit);
466         // }
467 
468         // copy back the revisions we removed.
469         for (final Iterator it = toMove.iterator(); it.hasNext();) {
470             builder.buildRevision((RevisionData) it.next());
471         }
472     }
473 
474     private int detectActionOnChildGivenActionOnParent(final FileBuilder childBuilder, final int parentRevision) {
475         int k;
476         for (k = 0; k < childBuilder.getRevisions().size(); k++) {
477             final RevisionData childData = (RevisionData) childBuilder.getRevisions().get(k);
478             final int childRevision = Integer.parseInt(childData.getRevisionNumber());
479 
480             // we don't want to add duplicate entries for the
481             // same revision
482             if (parentRevision == childRevision) {
483                 k = childBuilder.getRevisions().size();
484                 break;
485             }
486 
487             if (parentRevision > childRevision) {
488                 break; // we must insert it here!
489             }
490         }
491         return k;
492     }
493 
494     private void removePotentialInconsistencies(final HashSet implicitActions, final Collection fileBuilders) {
495         for (final Iterator iter = fileBuilders.iterator(); iter.hasNext();) {
496             final FileBuilder filebuilder = (FileBuilder) iter.next();
497 
498             // make sure our attic is well set, with our new deletions that we
499             // might have added.
500             if (!repositoryFileManager.existsInWorkingCopy(filebuilder.getName())) {
501                 builder.addToAttic(filebuilder.getName());
502             }
503 
504             // do we detect an inconsistency?
505             if (!repositoryFileManager.existsInWorkingCopy(filebuilder.getName()) && !filebuilder.finalRevisionIsDead()) {
506                 int earliestDelete = -1;
507                 for (int i = 0; i < filebuilder.getRevisions().size(); i++) {
508                     final RevisionData data = (RevisionData) filebuilder.getRevisions().get(i);
509 
510                     if (data.isDeletion()) {
511                         earliestDelete = i;
512                     }
513 
514                     if ((!data.isCreationOrRestore() && data.isChange()) || !implicitActions.contains(data)) {
515                         break;
516                     }
517                 }
518 
519                 if (earliestDelete > 0) {
520                     // avoid concurrent modification errors.
521                     final List toRemove = new ArrayList();
522                     for (final Iterator it = filebuilder.getRevisions().subList(0, earliestDelete).iterator(); it.hasNext();) {
523                         toRemove.add(it.next());
524                     }
525                     filebuilder.getRevisions().removeAll(toRemove);
526                 }
527             }
528         }
529     }
530 
531     private void cleanPotentialDuplicateImplicitActions(final Collection fileBuilders) {
532         for (final Iterator iter = fileBuilders.iterator(); iter.hasNext();) {
533             final FileBuilder filebuilder = (FileBuilder) iter.next();
534 
535             boolean previousIsDelete = false;
536             final List toRemove = new ArrayList();
537             // for this file, iterate through all revisions and store any
538             // deletion revision that follows
539             // a deletion.
540             for (final Iterator it = filebuilder.getRevisions().iterator(); it.hasNext();) {
541                 final RevisionData data = (RevisionData) it.next();
542                 if (data.isDeletion() && previousIsDelete) {
543                     toRemove.add(data);
544                 }
545                 previousIsDelete = data.isDeletion();
546             }
547 
548             // get rid of the duplicate deletion for this file.
549             if (!toRemove.isEmpty()) {
550                 filebuilder.getRevisions().removeAll(toRemove);
551             }
552         }
553     }
554 
555     private Collection fetchAllFileNames(final ArrayList files) {
556         final Collection fileBuilders = builder.getFileBuilders().values();
557         for (final Iterator iter = fileBuilders.iterator(); iter.hasNext();) {
558             final FileBuilder fileBuilder = (FileBuilder) iter.next();
559             files.add(fileBuilder.getName());
560         }
561         return fileBuilders;
562     }
563 
564     /**
565      * We have created FileBuilders for directories because we needed the
566      * information to be able to find implicit actions. However, we don't want
567      * to query directories for their line counts later on. Therefore, we must
568      * remove them here.
569      * 
570      * (@link SvnInfoUtils#isDirectory(String)) is used to know what files are
571      * directories. Deleted directories are assumed to have been added in (@link
572      * #verifyImplicitActions())
573      */
574     protected void removeDirectories() {
575         final Collection fileBuilders = builder.getFileBuilders().values();
576         final ArrayList toRemove = new ArrayList();
577         for (final Iterator iter = fileBuilders.iterator(); iter.hasNext();) {
578             final FileBuilder fileBuilder = (FileBuilder) iter.next();
579             if (repositoryFileManager.isDirectory(fileBuilder.getName())) {
580                 toRemove.add(fileBuilder.getName());
581             }
582         }
583 
584         for (final Iterator iter = toRemove.iterator(); iter.hasNext();) {
585             builder.getFileBuilders().remove(iter.next());
586         }
587 
588     }
589 
590     /**
591      * Parses the svn log file.
592      * 
593      * @return the SaxParserFactory, so that it can be reused.
594      * @throws IOException
595      *             errors while reading file.
596      * @throws LogSyntaxException
597      *             invalid log syntax.
598      */
599     protected SAXParserFactory parseSvnLog() throws IOException, LogSyntaxException {
600         final long startTime = System.currentTimeMillis();
601         SvnConfigurationOptions.getTaskLogger().log("starting to parse...");
602 
603         final SAXParserFactory factory = SAXParserFactory.newInstance();
604         try {
605             final SAXParser parser = factory.newSAXParser();
606             parser.parse(logFile, new SvnXmlLogFileHandler(builder, repositoryFileManager));
607         } catch (final ParserConfigurationException e) {
608             throw new LogSyntaxException("svn log: " + e.getMessage());
609         } catch (final SAXException e) {
610             throw new LogSyntaxException("svn log: " + e.getMessage());
611         }
612 
613         SvnConfigurationOptions.getTaskLogger().log("parsing svn log finished in " + (System.currentTimeMillis() - startTime) + " ms.");
614         return factory;
615     }
616 
617     private long totalTime = 0;
618 
619     private long groupStart = 0;
620 
621     private int calls = 0;
622 
623     private int requiredDiffCalls = 0;
624 
625     private String cacheFileName;
626 
627     protected class DiffTask implements Runnable {
628         private String fileName;
629         private String newRevision;
630         private String oldRevision;
631         private FileBuilder fileBuilder;
632 
633         protected DiffTask() {
634         }
635 
636         protected DiffTask(final String newRevision) {
637             super();
638             this.newRevision = newRevision;
639         }
640 
641         public DiffTask(final String fileName, final String newRevision, final String oldRevision, final FileBuilder fileBuilder) {
642             super();
643             this.fileName = fileName;
644             this.newRevision = newRevision;
645             this.oldRevision = oldRevision;
646             this.fileBuilder = fileBuilder;
647         }
648 
649         /**
650          * @return the fileName
651          */
652         public String getFileName() {
653             return fileName;
654         }
655 
656         /**
657          * @param fileName
658          *            the fileName to set
659          */
660         public void setFileName(final String fileName) {
661             this.fileName = fileName;
662         }
663 
664         /**
665          * @return the newRevision
666          */
667         public String getNewRevision() {
668             return newRevision;
669         }
670 
671         /**
672          * @param newRevision
673          *            the newRevision to set
674          */
675         public void setNewRevision(final String newRevision) {
676             this.newRevision = newRevision;
677         }
678 
679         /**
680          * @return the oldRevision
681          */
682         public String getOldRevision() {
683             return oldRevision;
684         }
685 
686         /**
687          * @param oldRevision
688          *            the oldRevision to set
689          */
690         public void setOldRevision(final String oldRevision) {
691             this.oldRevision = oldRevision;
692         }
693 
694         public void run() {
695             int[] lineDiff;
696             long end = 0L;
697             try {
698                 // SvnConfigurationOptions.getTaskLogger().log(Thread.currentThread().getName()
699                 // + " Starts... now");
700                 final long start = System.currentTimeMillis();
701                 lineDiff = repositoryFileManager.getLineDiff(oldRevision, newRevision, fileName);
702                 end = System.currentTimeMillis();
703                 synchronized (cacheBuilder) {
704                     totalTime += (end - start);
705                 }
706 
707                 SvnConfigurationOptions.getTaskLogger().info(
708                         "svn diff " + (++calls) + "/" + requiredDiffCalls + ": " + fileName + ", r" + oldRevision + " to r" + newRevision + ", +" + lineDiff[0]
709                                 + " -" + lineDiff[1] + " (" + (end - start) + " ms.) " + Thread.currentThread().getName());
710             } catch (final BinaryDiffException e) {
711                 calls++;
712                 trackBinaryFile();
713                 return;
714             } catch (final IOException e) {
715                 SvnConfigurationOptions.getTaskLogger()
716                         .error("" + (++calls) + "/" + requiredDiffCalls + " IOException: Unable to obtain diff: " + e.toString());
717                 return;
718             }
719 
720             trackFileDiff(lineDiff);
721 
722             performIntermediarySave(end);
723         }
724 
725         protected void trackBinaryFile() {
726             // file is binary and has been deleted
727             cacheBuilder.newRevision(fileName, newRevision, "0", "0", true);
728             fileBuilder.setBinary(true);
729         }
730 
731         protected void trackFileDiff(final int[] lineDiff) {
732             if (lineDiff[0] != -1 && lineDiff[1] != -1) {
733                 builder.updateRevision(fileName, newRevision, lineDiff[0], lineDiff[1]);
734                 cacheBuilder.newRevision(fileName, newRevision, lineDiff[0] + "", lineDiff[1] + "", false);
735             } else {
736                 SvnConfigurationOptions.getTaskLogger().info("unknown behaviour; to be investigated:" + fileName + " r:" + oldRevision + "/r:" + newRevision);
737             }
738         }
739 
740         protected void performIntermediarySave(long end) {
741             synchronized (cacheBuilder) {
742                 if (end - groupStart > INTERMEDIARY_SAVE_INTERVAL_MS) {
743                     final long start = System.currentTimeMillis();
744                     XMLUtil.writeXmlFile(cacheBuilder.getDocument(), cacheFileName);
745                     groupStart = System.currentTimeMillis();
746                     final double estimateLeftInMs = ((double) totalTime / (double) calls * (requiredDiffCalls - calls) / SvnConfigurationOptions
747                             .getNumberSvnDiffThreads());
748                     end = System.currentTimeMillis();
749                     SvnConfigurationOptions.getTaskLogger().info(
750                             System.getProperty("line.separator") + new Date() + " Intermediary save took " + (end - start) + " ms. Estimated completion="
751                                     + new Date(end + (long) estimateLeftInMs) + System.getProperty("line.separator"));
752                 }
753             }
754         }
755 
756         protected FileBuilder getFileBuilder() {
757             return fileBuilder;
758         }
759 
760         protected void setFileBuilder(final FileBuilder fileBuilder) {
761             this.fileBuilder = fileBuilder;
762         }
763 
764     }
765 
766     protected class PerRevDiffTask extends DiffTask {
767         private Map fileBuilders;
768 
769         public PerRevDiffTask(final String newRevision, final Map fileBuilders) {
770             super(newRevision);
771             this.fileBuilders = fileBuilders;
772         }
773 
774         public void run() {
775             int[] lineDiff;
776             Vector results;
777             long end = 0L;
778             try {
779                 // SvnConfigurationOptions.getTaskLogger().log(Thread.currentThread().getName()
780                 // + " Starts... now");
781                 final long start = System.currentTimeMillis();
782                 results = repositoryFileManager.getRevisionDiff(getNewRevision());
783                 end = System.currentTimeMillis();
784                 synchronized (cacheBuilder) {
785                     totalTime += (end - start);
786                 }
787 
788                 SvnConfigurationOptions.getTaskLogger().info(
789                         "svn diff " + (++calls) + "/" + requiredDiffCalls + " on r" + getNewRevision() + " (" + (end - start) + " ms.) "
790                                 + Thread.currentThread().getName());
791 
792                 for (int i = 0; i < results.size(); i++) {
793                     final Object[] element = (Object[]) results.get(i);
794 
795                     if (element.length == SvnDiffUtils.RESULT_SIZE && fileBuilders.containsKey(element[0].toString())) {
796                         setFileName(element[0].toString());
797                         setFileBuilder((FileBuilder) fileBuilders.get(getFileName()));
798                         lineDiff = (int[]) element[1];
799                         setOldRevision("?");
800 
801                         final Boolean isBinary = (Boolean) element[2];
802                         if (isBinary.booleanValue()) {
803                             trackBinaryFile();
804                         }
805 
806                         SvnConfigurationOptions.getTaskLogger().info(
807                                 "\t " + getFileName() + ", on r" + getNewRevision() + ", +" + lineDiff[0] + " -" + lineDiff[1]);
808 
809                         trackFileDiff(lineDiff);
810                     } else {
811                         SvnConfigurationOptions.getTaskLogger().error("Problem with diff " + i + " for revision " + getNewRevision() + ".");
812                     }
813                 }
814 
815             } catch (final BinaryDiffException e) {
816                 // not supposed to happen. tracked individually.
817                 return;
818             } catch (final IOException e) {
819                 SvnConfigurationOptions.getTaskLogger()
820                         .error("" + (++calls) + "/" + requiredDiffCalls + " IOException: Unable to obtain diff: " + e.toString());
821                 return;
822             }
823 
824             performIntermediarySave(end);
825         }
826     }
827 }