diff --git a/main.go b/main.go index 01e6413..287307c 100644 --- a/main.go +++ b/main.go @@ -285,23 +285,20 @@ func (dm *DocumentManager) compareFiles(f1Number, f2Number int) (float64, error) } histogram := make(map[string]int) - for _, lines := range [][]string{f1, f2} { - for _, line := range lines { - // Skip blank lines, which can throw off the count. - if line == "" { - continue - } + for _, line := range f1 { histogram[line]++ } + for _, line := range f2 { + histogram[line]-- } - var overlap int + var differences float64 for _, v := range histogram { - if v == 2 { - overlap++ - } + differences += math.Abs(float64(v)) } - return float64(overlap) / float64(len(histogram)), nil + + similarity := 1 - (differences / float64(len(f1)+len(f2))) + return similarity, nil } // Document stores a document ID and a list of associated files.