Update .gitignore; improve documentation

Add the `files` directories to the .gitignore to prevent them from being
committed again. Update the readme with the latest command line options,
and revise method documentation to match implementation.
This commit is contained in:
Ian Molee 2024-04-05 05:31:32 -07:00
parent e11464082b
commit 52373cff45
3 changed files with 13 additions and 8 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
*.log *.log
output.*.txt output.*.txt
.vscode .vscode
files
files.*/

View File

@ -26,14 +26,16 @@ arguments cannot be passed to a Makefile target.
## Options ## Options
``` ```
-output string
output file (default is stdout)
-path string -path string
path to the file pool (default "files") path to the file pool (default "files")
-prefix -prefix
use '[doc ###]' prefix for output use '[doc ###]' prefix for output
-threshold float -threshold float
similarity threshold (default 0.5) similarity threshold (default 0.5)
-verbose -verbose
enable verbose logging enable verbose logging
-workers int -workers int
number of workers to use (default 2*<number-of-cores>) number of workers to use (default 2*<number-of-cores>)
``` ```

View File

@ -311,9 +311,10 @@ func (dm *DocumentManager) maybeAssociateFileWithDocument(workItem WorkItem, wor
} }
} }
// compareFiles computes how much two files overlap, on a scale // compareFiles computes how much two files overlap on a scale of 0 to 1 by
// of 0 to 1 by iterating through the files and identifying lines // iterating through the files and calculating a similarity score that's based
// that are duplicated. // on the number of line-centric differences between the contents of the two
// files.
func (dm *DocumentManager) compareFiles(f1Number, f2Number int) (float64, error) { func (dm *DocumentManager) compareFiles(f1Number, f2Number int) (float64, error) {
f1, err := dm.fcc.GetFileContents(f1Number) f1, err := dm.fcc.GetFileContents(f1Number)
if err != nil { if err != nil {