Update .gitignore; improve documentation

Add the `files` directories to the .gitignore to prevent them from being
committed again. Update the readme with the latest command line options,
and revise method documentation to match implementation.
This commit is contained in:
Ian Molee 2024-04-05 05:31:32 -07:00
parent e11464082b
commit 52373cff45
3 changed files with 13 additions and 8 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
*.log
output.*.txt
.vscode
files
files.*/

View File

@ -26,14 +26,16 @@ arguments cannot be passed to a Makefile target.
## Options
```
-output string
output file (default is stdout)
-path string
path to the file pool (default "files")
path to the file pool (default "files")
-prefix
use '[doc ###]' prefix for output
use '[doc ###]' prefix for output
-threshold float
similarity threshold (default 0.5)
similarity threshold (default 0.5)
-verbose
enable verbose logging
enable verbose logging
-workers int
number of workers to use (default 2*<number-of-cores>)
number of workers to use (default 2*<number-of-cores>)
```

View File

@ -311,9 +311,10 @@ func (dm *DocumentManager) maybeAssociateFileWithDocument(workItem WorkItem, wor
}
}
// compareFiles computes how much two files overlap, on a scale
// of 0 to 1 by iterating through the files and identifying lines
// that are duplicated.
// compareFiles computes how much two files overlap on a scale of 0 to 1 by
// iterating through the files and calculating a similarity score that's based
// on the number of line-centric differences between the contents of the two
// files.
func (dm *DocumentManager) compareFiles(f1Number, f2Number int) (float64, error) {
f1, err := dm.fcc.GetFileContents(f1Number)
if err != nil {