From 2b6fd20af5c7470810dae544cbae7912505a5b22 Mon Sep 17 00:00:00 2001
From: Tyrone Warren <ty@nextvr.com>
Date: Tue, 12 Sep 2017 13:51:09 -0700
Subject: [PATCH 1/3] Altered to accept and return maps of [string]interface
 string is searchable, interface is the return value

---
 closestmatch.go      | 45 ++++++++++++++--------
 closestmatch_test.go | 91 +++++++++++++++++++++++++++-----------------
 test/data.go         | 35 ++++++++++++++---
 3 files changed, 115 insertions(+), 56 deletions(-)

diff --git a/closestmatch.go b/closestmatch.go
index 1ea7a6a..fa21f99 100755
--- a/closestmatch.go
+++ b/closestmatch.go
@@ -22,23 +22,26 @@ type ClosestMatch struct {
 type IDInfo struct {
 	Key           string
 	NumSubstrings int
+	Data          interface{}
 }
 
 // New returns a new structure for performing closest matches
-func New(possible []string, subsetSize []int) *ClosestMatch {
+func New(possible map[string]interface{}, subsetSize []int) *ClosestMatch {
 	cm := new(ClosestMatch)
 	cm.SubstringSizes = subsetSize
 	cm.SubstringToID = make(map[string]map[uint32]struct{})
 	cm.ID = make(map[uint32]IDInfo)
-	for i, s := range possible {
-		substrings := cm.splitWord(strings.ToLower(s))
-		cm.ID[uint32(i)] = IDInfo{Key: s, NumSubstrings: len(substrings)}
+	i := 0
+	for k, m := range possible {
+		substrings := cm.splitWord(strings.ToLower(k))
+		cm.ID[uint32(i)] = IDInfo{Key: k, NumSubstrings: len(substrings), Data: m}
 		for substring := range substrings {
 			if _, ok := cm.SubstringToID[substring]; !ok {
 				cm.SubstringToID[substring] = make(map[uint32]struct{})
 			}
 			cm.SubstringToID[substring][uint32(i)] = struct{}{}
 		}
+		i++
 	}
 
 	return cm
@@ -77,16 +80,23 @@ func (cm *ClosestMatch) Save(filename string) error {
 	return enc.Encode(cm)
 }
 
+type workerResult struct {
+	Value int
+	Data  interface{}
+}
+
 func (cm *ClosestMatch) worker(id int, jobs <-chan job, results chan<- result) {
 	for j := range jobs {
-		m := make(map[string]int)
+		m := make(map[string]workerResult)
 		if ids, ok := cm.SubstringToID[j.substring]; ok {
 			weight := 1000 / len(ids)
 			for id := range ids {
 				if _, ok2 := m[cm.ID[id].Key]; !ok2 {
-					m[cm.ID[id].Key] = 0
+					m[cm.ID[id].Key] = workerResult{Value: 0, Data: cm.ID[id].Data}
 				}
-				m[cm.ID[id].Key] += 1 + 1000/len(cm.ID[id].Key) + weight
+				item := m[cm.ID[id].Key]
+				item.Value += 1 + 1000/len(cm.ID[id].Key) + weight
+				m[cm.ID[id].Key] = item
 			}
 		}
 		results <- result{m: m}
@@ -98,10 +108,10 @@ type job struct {
 }
 
 type result struct {
-	m map[string]int
+	m map[string]workerResult
 }
 
-func (cm *ClosestMatch) match(searchWord string) map[string]int {
+func (cm *ClosestMatch) match(searchWord string) map[string]workerResult {
 	searchSubstrings := cm.splitWord(searchWord)
 	searchSubstringsLen := len(searchSubstrings)
 
@@ -118,12 +128,14 @@ func (cm *ClosestMatch) match(searchWord string) map[string]int {
 	}
 	close(jobs)
 
-	m := make(map[string]int)
+	m := make(map[string]workerResult)
 	for a := 1; a <= searchSubstringsLen; a++ {
 		r := <-results
 		for key := range r.m {
 			if _, ok := m[key]; ok {
-				m[key] += r.m[key]
+				x := m[key]
+				x.Value += r.m[key].Value
+				m[key] = x
 			} else {
 				m[key] = r.m[key]
 			}
@@ -142,22 +154,22 @@ func (cm *ClosestMatch) Closest(searchWord string) string {
 }
 
 // ClosestN searches for the `searchWord` and returns the n closests matches
-func (cm *ClosestMatch) ClosestN(searchWord string, max int) []string {
-	matches := make([]string, 0, max)
+func (cm *ClosestMatch) ClosestN(searchWord string, max int) []interface{} {
+	matches := make([]interface{}, 0, max)
 	for i, pair := range rankByWordCount(cm.match(searchWord)) {
 		if i >= max {
 			break
 		}
-		matches = append(matches, pair.Key)
+		matches = append(matches, pair.Data)
 	}
 	return matches
 }
 
-func rankByWordCount(wordFrequencies map[string]int) PairList {
+func rankByWordCount(wordFrequencies map[string]workerResult) PairList {
 	pl := make(PairList, len(wordFrequencies))
 	i := 0
 	for k, v := range wordFrequencies {
-		pl[i] = Pair{k, v}
+		pl[i] = Pair{k, v.Value, v.Data}
 		i++
 	}
 	sort.Sort(sort.Reverse(pl))
@@ -167,6 +179,7 @@ func rankByWordCount(wordFrequencies map[string]int) PairList {
 type Pair struct {
 	Key   string
 	Value int
+	Data  interface{}
 }
 
 type PairList []Pair
diff --git a/closestmatch_test.go b/closestmatch_test.go
index 1a19789..8f3316d 100755
--- a/closestmatch_test.go
+++ b/closestmatch_test.go
@@ -6,17 +6,17 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/schollz/closestmatch/test"
+	"github.com/Yugloocamai/closestmatch/test"
 )
 
 func BenchmarkNew(b *testing.B) {
 	for i := 0; i < b.N; i++ {
-		New(test.WordsToTest, []int{3})
+		New(test.BooksToTest, []int{3})
 	}
 }
 
 func BenchmarkSplitOne(b *testing.B) {
-	cm := New(test.WordsToTest, []int{3})
+	cm := New(test.BooksToTest, []int{3})
 	searchWord := test.SearchWords[0]
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -26,8 +26,8 @@ func BenchmarkSplitOne(b *testing.B) {
 
 func BenchmarkClosestOne(b *testing.B) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{3})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{3})
 	searchWord := test.SearchWords[0]
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -37,8 +37,8 @@ func BenchmarkClosestOne(b *testing.B) {
 
 func BenchmarkClosest3(b *testing.B) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{3})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{3})
 	searchWord := test.SearchWords[0]
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -48,8 +48,8 @@ func BenchmarkClosest3(b *testing.B) {
 
 func BenchmarkClosest30(b *testing.B) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{3})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{3})
 	searchWord := test.SearchWords[0]
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -59,8 +59,8 @@ func BenchmarkClosest30(b *testing.B) {
 
 func BenchmarkFileLoad(b *testing.B) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{3, 4})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{3, 4})
 	cm.Save("test/books.list.cm.gz")
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
@@ -70,8 +70,8 @@ func BenchmarkFileLoad(b *testing.B) {
 
 func BenchmarkFileSave(b *testing.B) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{3, 4})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{3, 4})
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		cm.Save("test/books.list.cm.gz")
@@ -79,7 +79,13 @@ func BenchmarkFileSave(b *testing.B) {
 }
 
 func ExampleMatchingSmall() {
-	cm := New([]string{"love", "loving", "cat", "kit", "cats"}, []int{4})
+	loveCats := make(map[string]interface{})
+	loveCats["love"] = map[string]string{"name": "love"}
+	loveCats["loving"] = map[string]string{"name": "loving"}
+	loveCats["cat"] = map[string]string{"name": "cat"}
+	loveCats["kit"] = map[string]string{"name": "kit"}
+	loveCats["cats"] = map[string]string{"name": "cats"}
+	cm := New(loveCats, []int{4})
 	fmt.Println(cm.splitWord("love"))
 	fmt.Println(cm.splitWord("kit"))
 	fmt.Println(cm.Closest("kit"))
@@ -91,7 +97,13 @@ func ExampleMatchingSmall() {
 }
 
 func ExampleMatchingSimple() {
-	cm := New(test.WordsToTest, []int{3})
+
+	booksLines := strings.Split(strings.ToLower(test.Books), "\n")
+	wordsToTest := make(map[string]interface{})
+	for _, v := range booksLines {
+		wordsToTest[v] = map[string]string{"words": v}
+	}
+	cm := New(wordsToTest, []int{3})
 	for _, searchWord := range test.SearchWords {
 		fmt.Printf("'%s' matched '%s'\n", searchWord, cm.Closest(searchWord))
 	}
@@ -100,21 +112,26 @@ func ExampleMatchingSimple() {
 	// 'mysterious afur at styles by christie' matched 'the mysterious affair at styles by agatha christie'
 	// 'hard times by charles dickens' matched 'hard times by charles dickens'
 	// 'complete william shakespeare' matched 'the complete works of william shakespeare by william shakespeare'
-	// 'war by hg wells' matched 'the war of the worlds by h. g. wells'
+	// 'War by HG Wells' matched 'the war of the worlds by h. g. wells'
 
 }
 
 func ExampleMatchingN() {
-	cm := New(test.WordsToTest, []int{4})
-	fmt.Println(cm.ClosestN("war h.g. wells", 3))
+	cm := New(test.BooksToTest, []int{4})
+	results := cm.ClosestN("war h.g. wells", 3)
+	var slice []string
+	for _, v := range results {
+		slice = append(slice, v.(map[string]string)["name"])
+	}
+	fmt.Println(slice)
 	// Output:
 	// [the war of the worlds by h. g. wells the time machine by h. g. wells war and peace by graf leo tolstoy]
 }
 
 func ExampleMatchingBigList() {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{3})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{3})
 	searchWord := "island of a thod mirrors"
 	fmt.Println(cm.Closest(searchWord))
 	// Output:
@@ -123,12 +140,12 @@ func ExampleMatchingBigList() {
 
 func ExampleMatchingCatcher() {
 	bText, _ := ioutil.ReadFile("test/catcher.txt")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{5})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{5})
 	searchWord := "catcher in the rye by jd salinger"
 	for i, match := range cm.ClosestN(searchWord, 3) {
 		if i == 2 {
-			fmt.Println(match)
+			fmt.Println(match.(map[string]string)["name"])
 		}
 	}
 	// Output:
@@ -137,12 +154,12 @@ func ExampleMatchingCatcher() {
 
 func ExampleMatchingPotter() {
 	bText, _ := ioutil.ReadFile("test/potter.txt")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{5})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{5})
 	searchWord := "harry potter and the half blood prince by j.k. rowling"
 	for i, match := range cm.ClosestN(searchWord, 3) {
 		if i == 1 {
-			fmt.Println(match)
+			fmt.Println(match.(map[string]string)["name"])
 		}
 	}
 	// Output:
@@ -151,23 +168,27 @@ func ExampleMatchingPotter() {
 
 func TestAccuracyBookWords(t *testing.T) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{4, 5})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{4, 5})
 	accuracy := cm.AccuracyMutatingWords()
 	fmt.Printf("Accuracy with mutating words in book list:\t%2.1f%%\n", accuracy)
 }
 
 func TestAccuracyBookLetters(t *testing.T) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
-	cm := New(wordsToTest, []int{5})
+	books := test.GetBooks(string(bText))
+	cm := New(books, []int{5})
 	accuracy := cm.AccuracyMutatingLetters()
 	fmt.Printf("Accuracy with mutating letters in book list:\t%2.1f%%\n", accuracy)
 }
 
 func TestAccuracyDictionaryLetters(t *testing.T) {
 	bText, _ := ioutil.ReadFile("test/popular.txt")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
+	words := strings.Split(strings.ToLower(string(bText)), "\n")
+	wordsToTest := make(map[string]interface{})
+	for _, v := range words {
+		wordsToTest[v] = map[string]string{"word": v}
+	}
 	cm := New(wordsToTest, []int{2, 3, 4})
 	accuracy := cm.AccuracyMutatingWords()
 	fmt.Printf("Accuracy with mutating letters in dictionary:\t%2.1f%%\n", accuracy)
@@ -175,12 +196,12 @@ func TestAccuracyDictionaryLetters(t *testing.T) {
 
 func TestSaveLoad(t *testing.T) {
 	bText, _ := ioutil.ReadFile("test/books.list")
-	wordsToTest := strings.Split(strings.ToLower(string(bText)), "\n")
+	books := test.GetBooks(string(bText))
 	type TestStruct struct {
 		cm *ClosestMatch
 	}
 	tst := new(TestStruct)
-	tst.cm = New(wordsToTest, []int{5})
+	tst.cm = New(books, []int{5})
 	err := tst.cm.Save("test.gob")
 	if err != nil {
 		t.Error(err)
@@ -191,8 +212,8 @@ func TestSaveLoad(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	answer2 := tst2.cm.Closest("war of the worlds by hg wells")
-	answer1 := tst.cm.Closest("war of the worlds by hg wells")
+	answer2 := tst2.cm.Closest("war of the worlds")
+	answer1 := tst.cm.Closest("war of the worlds")
 	if answer1 != answer2 {
 		t.Errorf("Differing answers: '%s' '%s'", answer1, answer2)
 	}
diff --git a/test/data.go b/test/data.go
index ed4ebb0..eef3386 100644
--- a/test/data.go
+++ b/test/data.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 )
 
-var books = `Pride and Prejudice by Jane Austen
+var Books = `Pride and Prejudice by Jane Austen
 Alice's Adventures in Wonderland by Lewis Carroll
 The Importance of Being Earnest: A Trivial Comedy for Serious People by Oscar Wilde
 A Tale of Two Cities by Charles Dickens
@@ -109,12 +109,37 @@ Red Riding Hood by Sarah Blakley-Cartwright
 The Kingdom of This World by Alejo Carpentier
 Hitty, Her First Hundred Years by Rachel Field`
 
-var WordsToTest []string
+var WordsToTest map[string]interface{}
+var BooksToTest map[string]interface{}
+
 var SearchWords = []string{"cervantes don quixote", "mysterious afur at styles by christie", "hard times by charles dickens", "complete william shakespeare", "War by HG Wells"}
 
 func init() {
-	WordsToTest = strings.Split(strings.ToLower(books), "\n")
-	for i := range SearchWords {
-		SearchWords[i] = strings.ToLower(SearchWords[i])
+
+	WordsToTest = make(map[string]interface{})
+	for _, v := range SearchWords {
+		WordsToTest[v] = map[string]string{"words": v}
+	}
+	// for i := range SearchWords {
+	// 	SearchWords[i] = strings.ToLower(SearchWords[i])
+	// }
+
+	BooksToTest = GetBooks(Books)
+	// for i := range SearchWords {
+	// 	SearchWords[i] = strings.ToLower(SearchWords[i])
+	// }
+}
+
+func GetBooks(text string) map[string]interface{} {
+	booksLines := strings.Split(strings.ToLower(text), "\n")
+	books := make(map[string]interface{})
+	for _, v := range booksLines {
+		pair := strings.Split(v, " by ")
+		author := "unknown"
+		if len(pair) == 2 {
+			author = pair[1]
+		}
+		books[v] = map[string]string{"author": author, "name": v}
 	}
+	return books
 }

From 502175c3c48bae3b4381491be0b2bfa1dbfa9916 Mon Sep 17 00:00:00 2001
From: Yugloocamai <Yugloocamai@gmail.com>
Date: Thu, 14 Sep 2017 15:14:54 -0700
Subject: [PATCH 2/3] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 6d9e132..36404a9 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
 
+#THIS IS A MODIFIED VERSION OF CLOSESTMATCH!  DOCUMENTATION WILL BE INACCURATE UNTIL I UPDATE.  This fork allows an interface to be attached to each searchable item so we can return anything we want.
+
 # closestmatch :page_with_curl:
 
 <a href="#"><img src="https://img.shields.io/badge/version-2.1.0-brightgreen.svg?style=flat-square" alt="Version"></a>

From 090dcfb9e28396c6d6e72b1cfc8e7cbb1ee3be9e Mon Sep 17 00:00:00 2001
From: Yugloocamai <Yugloocamai@gmail.com>
Date: Thu, 14 Sep 2017 15:15:23 -0700
Subject: [PATCH 3/3] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 36404a9..3398e4f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 
-#THIS IS A MODIFIED VERSION OF CLOSESTMATCH!  DOCUMENTATION WILL BE INACCURATE UNTIL I UPDATE.  This fork allows an interface to be attached to each searchable item so we can return anything we want.
+# THIS IS A MODIFIED VERSION OF CLOSESTMATCH!  DOCUMENTATION WILL BE INACCURATE UNTIL I UPDATE.  This fork allows an interface to be attached to each searchable item so we can return anything we want.
 
 # closestmatch :page_with_curl: