From 36a3cb6374f0aeb79ba8b1bd3ab3539f0d66079e Mon Sep 17 00:00:00 2001 From: Carlo Strub Date: Sat, 13 May 2017 22:34:54 +0000 Subject: [PATCH] make gometalinter happier --- classify.go | 57 +++++++++++++++++++++++++++++++++--------------- classify_test.go | 7 +++--- database_test.go | 3 ++- learn.go | 7 +----- learn_test.go | 6 +++-- mail.go | 15 +++++++++++++ 6 files changed, 65 insertions(+), 30 deletions(-) diff --git a/classify.go b/classify.go index 55c018a..cc7812b 100644 --- a/classify.go +++ b/classify.go @@ -37,13 +37,11 @@ func classificationPrior(db *bolt.DB) (g float64, err error) { return g, err } -// classificationLikelihood returns P(W|C_j) -- the probability of seeing a -// particular word W in a document of this class. -func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error) { +// classificationLikelihoodWordcounts gets wordcounts from database to be used +// in Likelihood calculation +func classificationLikelihoodWordcounts(db *bolt.DB, word string) (gN, jN float64, err error) { err = db.View(func(tx *bolt.Tx) error { - var gN, jN, gTotal, jTotal uint64 - b := tx.Bucket([]byte("Wordlists")) good := b.Bucket([]byte("Good")) @@ -53,7 +51,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error if err != nil { return err } - gN = gWordHLL.Count() + gN = float64(gWordHLL.Count()) } junk := b.Bucket([]byte("Junk")) jWordRaw := junk.Get([]byte(word)) @@ -62,9 +60,20 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error if err != nil { return err } - jN = jWordHLL.Count() + jN = float64(jWordHLL.Count()) } + return nil + }) + + return gN, jN, err +} + +// classificationLikelihoodStatistics gets global statistics from database to +// be used in Likelihood calculation +func classificationLikelihoodStatistics(db *bolt.DB, word string) (gTotal, jTotal float64, err error) { + + err = db.View(func(tx *bolt.Tx) error { p := tx.Bucket([]byte("Statistics")) gRaw := p.Get([]byte("ProcessedGood")) if len(gRaw) > 0 { @@ -72,7 +81,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error if err != nil { return err } - gTotal = gHLL.Count() + gTotal = float64(gHLL.Count()) } jRaw := p.Get([]byte("ProcessedJunk")) if len(jRaw) > 0 { @@ -80,7 +89,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error if err != nil { return err } - jTotal = jHLL.Count() + jTotal = float64(jHLL.Count()) } if gTotal == 0 { @@ -90,12 +99,29 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error return errors.New("no junk mails have yet been classified") } - g = float64(gN) / float64(gTotal) - j = float64(jN) / float64(jTotal) - return nil }) + return gTotal, jTotal, err +} + +// classificationLikelihood returns P(W|C_j) -- the probability of seeing a +// particular word W in a document of this class. +func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error) { + + gN, jN, err := classificationLikelihoodWordcounts(db, word) + if err != nil { + return g, j, err + } + + gTotal, jTotal, err := classificationLikelihoodStatistics(db, word) + if err != nil { + return g, j, err + } + + g = gN / gTotal + j = jN / jTotal + return g, j, err } @@ -124,12 +150,7 @@ func classificationWord(db *bolt.DB, word string) (g float64, err error) { // client. func (m *Mail) Classify(db *bolt.DB) error { - err := m.Clean() - if err != nil { - return err - } - - list, err := m.Wordlist() + list, err := m.cleanWordlist() if err != nil { return err } diff --git a/classify_test.go b/classify_test.go index 444007c..2b897a3 100644 --- a/classify_test.go +++ b/classify_test.go @@ -32,7 +32,8 @@ var _ = Describe("Classify", func() { err = m.Load("test/Maildir") Ω(err).ShouldNot(HaveOccurred()) - m.Learn(dbs["test/Maildir"]) + err = m.Learn(dbs["test/Maildir"]) + Ω(err).ShouldNot(HaveOccurred()) // Load good mail m = &Mail{ @@ -42,7 +43,8 @@ var _ = Describe("Classify", func() { err = m.Load("test/Maildir") Ω(err).ShouldNot(HaveOccurred()) - m.Learn(dbs["test/Maildir"]) + err = m.Learn(dbs["test/Maildir"]) + Ω(err).ShouldNot(HaveOccurred()) }) AfterEach(func() { // Cleanup @@ -84,7 +86,6 @@ var _ = Describe("Classify", func() { It("Classify one word from the mail that was learned in good and junk", func() { - m.Learn(dbs["test/Maildir"]) answer, prob, err := Junk(dbs["test/Maildir"], []string{"than"}) Ω(err).ShouldNot(HaveOccurred()) diff --git a/database_test.go b/database_test.go index 68349e0..f978289 100644 --- a/database_test.go +++ b/database_test.go @@ -57,7 +57,7 @@ var _ = Describe("Database", func() { CloseDatabases(dbs) dbTest := dbs["test/Maildir"] - var n = 4 + n := 4 err = dbTest.View(func(tx *bolt.Tx) error { b := tx.Bucket([]byte("Statistics")) @@ -67,6 +67,7 @@ var _ = Describe("Database", func() { return nil }) Ω(err).Should(HaveOccurred()) + Ω(n).Should(Equal(4)) }) }) }) diff --git a/learn.go b/learn.go index e5a4b17..482ea5a 100644 --- a/learn.go +++ b/learn.go @@ -12,12 +12,7 @@ func (m *Mail) Learn(db *bolt.DB) error { log.Println("learn mail " + m.Key) - err := m.Clean() - if err != nil { - return err - } - - list, err := m.Wordlist() + list, err := m.cleanWordlist() if err != nil { return err } diff --git a/learn_test.go b/learn_test.go index 4521a15..49d584f 100644 --- a/learn_test.go +++ b/learn_test.go @@ -51,7 +51,8 @@ var _ = Describe("Learn", func() { It("Load databases for a maildir, then learn a mail and check whether the word counts are correct in the db", func() { - m.Learn(dbs["test/Maildir"]) + err = m.Learn(dbs["test/Maildir"]) + Ω(err).ShouldNot(HaveOccurred()) var jN, sN, gN int @@ -76,7 +77,8 @@ var _ = Describe("Learn", func() { It("Load databases for a maildir, then learn a mail and check whether individual word counts are equal to 1", func() { - m.Learn(dbs["test/Maildir"]) + err = m.Learn(dbs["test/Maildir"]) + Ω(err).ShouldNot(HaveOccurred()) var wordCount uint64 diff --git a/mail.go b/mail.go index 2858b4a..e62f258 100644 --- a/mail.go +++ b/mail.go @@ -222,6 +222,21 @@ func (m *Mail) Wordlist() (w []string, err error) { return w, err } +// cleanWordlist combines Clean and Wordlist in one internal function +func (m *Mail) cleanWordlist() (w []string, err error) { + err = m.Clean() + if err != nil { + return w, err + } + + w, err = m.Wordlist() + if err != nil { + return w, err + } + + return w, err +} + // LoadMails creates missing directories and then loads all mails from a given // slice of Maildirs func LoadMails(d []Maildir) (mails map[Maildir][]*Mail, err error) {