diff --git a/.gitignore b/.gitignore index 29596f3..475cb0d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.log output.*.txt .vscode +files +files.*/ diff --git a/README.md b/README.md index af5676a..70341f6 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,16 @@ arguments cannot be passed to a Makefile target. ## Options ``` + -output string + output file (default is stdout) -path string - path to the file pool (default "files") + path to the file pool (default "files") -prefix - use '[doc ###]' prefix for output + use '[doc ###]' prefix for output -threshold float - similarity threshold (default 0.5) + similarity threshold (default 0.5) -verbose - enable verbose logging + enable verbose logging -workers int - number of workers to use (default 2*) + number of workers to use (default 2*) ``` diff --git a/main.go b/main.go index 7196048..cc14ff8 100644 --- a/main.go +++ b/main.go @@ -311,9 +311,10 @@ func (dm *DocumentManager) maybeAssociateFileWithDocument(workItem WorkItem, wor } } -// compareFiles computes how much two files overlap, on a scale -// of 0 to 1 by iterating through the files and identifying lines -// that are duplicated. +// compareFiles computes how much two files overlap on a scale of 0 to 1 by +// iterating through the files and calculating a similarity score that's based +// on the number of line-centric differences between the contents of the two +// files. func (dm *DocumentManager) compareFiles(f1Number, f2Number int) (float64, error) { f1, err := dm.fcc.GetFileContents(f1Number) if err != nil {