make gometalinter happier

master
Carlo Strub 7 years ago
parent f76bfbd6eb
commit 36a3cb6374

@ -37,13 +37,11 @@ func classificationPrior(db *bolt.DB) (g float64, err error) {
return g, err return g, err
} }
// classificationLikelihood returns P(W|C_j) -- the probability of seeing a // classificationLikelihoodWordcounts gets wordcounts from database to be used
// particular word W in a document of this class. // in Likelihood calculation
func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error) { func classificationLikelihoodWordcounts(db *bolt.DB, word string) (gN, jN float64, err error) {
err = db.View(func(tx *bolt.Tx) error { err = db.View(func(tx *bolt.Tx) error {
var gN, jN, gTotal, jTotal uint64
b := tx.Bucket([]byte("Wordlists")) b := tx.Bucket([]byte("Wordlists"))
good := b.Bucket([]byte("Good")) good := b.Bucket([]byte("Good"))
@ -53,7 +51,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil { if err != nil {
return err return err
} }
gN = gWordHLL.Count() gN = float64(gWordHLL.Count())
} }
junk := b.Bucket([]byte("Junk")) junk := b.Bucket([]byte("Junk"))
jWordRaw := junk.Get([]byte(word)) jWordRaw := junk.Get([]byte(word))
@ -62,9 +60,20 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil { if err != nil {
return err return err
} }
jN = jWordHLL.Count() jN = float64(jWordHLL.Count())
} }
return nil
})
return gN, jN, err
}
// classificationLikelihoodStatistics gets global statistics from database to
// be used in Likelihood calculation
func classificationLikelihoodStatistics(db *bolt.DB, word string) (gTotal, jTotal float64, err error) {
err = db.View(func(tx *bolt.Tx) error {
p := tx.Bucket([]byte("Statistics")) p := tx.Bucket([]byte("Statistics"))
gRaw := p.Get([]byte("ProcessedGood")) gRaw := p.Get([]byte("ProcessedGood"))
if len(gRaw) > 0 { if len(gRaw) > 0 {
@ -72,7 +81,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil { if err != nil {
return err return err
} }
gTotal = gHLL.Count() gTotal = float64(gHLL.Count())
} }
jRaw := p.Get([]byte("ProcessedJunk")) jRaw := p.Get([]byte("ProcessedJunk"))
if len(jRaw) > 0 { if len(jRaw) > 0 {
@ -80,7 +89,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil { if err != nil {
return err return err
} }
jTotal = jHLL.Count() jTotal = float64(jHLL.Count())
} }
if gTotal == 0 { if gTotal == 0 {
@ -90,12 +99,29 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
return errors.New("no junk mails have yet been classified") return errors.New("no junk mails have yet been classified")
} }
g = float64(gN) / float64(gTotal)
j = float64(jN) / float64(jTotal)
return nil return nil
}) })
return gTotal, jTotal, err
}
// classificationLikelihood returns P(W|C_j) -- the probability of seeing a
// particular word W in a document of this class.
func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error) {
gN, jN, err := classificationLikelihoodWordcounts(db, word)
if err != nil {
return g, j, err
}
gTotal, jTotal, err := classificationLikelihoodStatistics(db, word)
if err != nil {
return g, j, err
}
g = gN / gTotal
j = jN / jTotal
return g, j, err return g, j, err
} }
@ -124,12 +150,7 @@ func classificationWord(db *bolt.DB, word string) (g float64, err error) {
// client. // client.
func (m *Mail) Classify(db *bolt.DB) error { func (m *Mail) Classify(db *bolt.DB) error {
err := m.Clean() list, err := m.cleanWordlist()
if err != nil {
return err
}
list, err := m.Wordlist()
if err != nil { if err != nil {
return err return err
} }

@ -32,7 +32,8 @@ var _ = Describe("Classify", func() {
err = m.Load("test/Maildir") err = m.Load("test/Maildir")
Ω(err).ShouldNot(HaveOccurred()) Ω(err).ShouldNot(HaveOccurred())
m.Learn(dbs["test/Maildir"]) err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
// Load good mail // Load good mail
m = &Mail{ m = &Mail{
@ -42,7 +43,8 @@ var _ = Describe("Classify", func() {
err = m.Load("test/Maildir") err = m.Load("test/Maildir")
Ω(err).ShouldNot(HaveOccurred()) Ω(err).ShouldNot(HaveOccurred())
m.Learn(dbs["test/Maildir"]) err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
}) })
AfterEach(func() { AfterEach(func() {
// Cleanup // Cleanup
@ -84,7 +86,6 @@ var _ = Describe("Classify", func() {
It("Classify one word from the mail that was learned in good and junk", func() { It("Classify one word from the mail that was learned in good and junk", func() {
m.Learn(dbs["test/Maildir"])
answer, prob, err := Junk(dbs["test/Maildir"], []string{"than"}) answer, prob, err := Junk(dbs["test/Maildir"], []string{"than"})
Ω(err).ShouldNot(HaveOccurred()) Ω(err).ShouldNot(HaveOccurred())

@ -57,7 +57,7 @@ var _ = Describe("Database", func() {
CloseDatabases(dbs) CloseDatabases(dbs)
dbTest := dbs["test/Maildir"] dbTest := dbs["test/Maildir"]
var n = 4 n := 4
err = dbTest.View(func(tx *bolt.Tx) error { err = dbTest.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("Statistics")) b := tx.Bucket([]byte("Statistics"))
@ -67,6 +67,7 @@ var _ = Describe("Database", func() {
return nil return nil
}) })
Ω(err).Should(HaveOccurred()) Ω(err).Should(HaveOccurred())
Ω(n).Should(Equal(4))
}) })
}) })
}) })

@ -12,12 +12,7 @@ func (m *Mail) Learn(db *bolt.DB) error {
log.Println("learn mail " + m.Key) log.Println("learn mail " + m.Key)
err := m.Clean() list, err := m.cleanWordlist()
if err != nil {
return err
}
list, err := m.Wordlist()
if err != nil { if err != nil {
return err return err
} }

@ -51,7 +51,8 @@ var _ = Describe("Learn", func() {
It("Load databases for a maildir, then learn a mail and check whether the word counts are correct in the db", func() { It("Load databases for a maildir, then learn a mail and check whether the word counts are correct in the db", func() {
m.Learn(dbs["test/Maildir"]) err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
var jN, sN, gN int var jN, sN, gN int
@ -76,7 +77,8 @@ var _ = Describe("Learn", func() {
It("Load databases for a maildir, then learn a mail and check whether individual word counts are equal to 1", func() { It("Load databases for a maildir, then learn a mail and check whether individual word counts are equal to 1", func() {
m.Learn(dbs["test/Maildir"]) err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
var wordCount uint64 var wordCount uint64

@ -222,6 +222,21 @@ func (m *Mail) Wordlist() (w []string, err error) {
return w, err return w, err
} }
// cleanWordlist combines Clean and Wordlist in one internal function
func (m *Mail) cleanWordlist() (w []string, err error) {
err = m.Clean()
if err != nil {
return w, err
}
w, err = m.Wordlist()
if err != nil {
return w, err
}
return w, err
}
// LoadMails creates missing directories and then loads all mails from a given // LoadMails creates missing directories and then loads all mails from a given
// slice of Maildirs // slice of Maildirs
func LoadMails(d []Maildir) (mails map[Maildir][]*Mail, err error) { func LoadMails(d []Maildir) (mails map[Maildir][]*Mail, err error) {

Loading…
Cancel
Save