make gometalinter happier

master
Carlo Strub 7 years ago
parent f76bfbd6eb
commit 36a3cb6374

@ -37,13 +37,11 @@ func classificationPrior(db *bolt.DB) (g float64, err error) {
return g, err
}
// classificationLikelihood returns P(W|C_j) -- the probability of seeing a
// particular word W in a document of this class.
func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error) {
// classificationLikelihoodWordcounts gets wordcounts from database to be used
// in Likelihood calculation
func classificationLikelihoodWordcounts(db *bolt.DB, word string) (gN, jN float64, err error) {
err = db.View(func(tx *bolt.Tx) error {
var gN, jN, gTotal, jTotal uint64
b := tx.Bucket([]byte("Wordlists"))
good := b.Bucket([]byte("Good"))
@ -53,7 +51,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil {
return err
}
gN = gWordHLL.Count()
gN = float64(gWordHLL.Count())
}
junk := b.Bucket([]byte("Junk"))
jWordRaw := junk.Get([]byte(word))
@ -62,9 +60,20 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil {
return err
}
jN = jWordHLL.Count()
jN = float64(jWordHLL.Count())
}
return nil
})
return gN, jN, err
}
// classificationLikelihoodStatistics gets global statistics from database to
// be used in Likelihood calculation
func classificationLikelihoodStatistics(db *bolt.DB, word string) (gTotal, jTotal float64, err error) {
err = db.View(func(tx *bolt.Tx) error {
p := tx.Bucket([]byte("Statistics"))
gRaw := p.Get([]byte("ProcessedGood"))
if len(gRaw) > 0 {
@ -72,7 +81,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil {
return err
}
gTotal = gHLL.Count()
gTotal = float64(gHLL.Count())
}
jRaw := p.Get([]byte("ProcessedJunk"))
if len(jRaw) > 0 {
@ -80,7 +89,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
if err != nil {
return err
}
jTotal = jHLL.Count()
jTotal = float64(jHLL.Count())
}
if gTotal == 0 {
@ -90,12 +99,29 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
return errors.New("no junk mails have yet been classified")
}
g = float64(gN) / float64(gTotal)
j = float64(jN) / float64(jTotal)
return nil
})
return gTotal, jTotal, err
}
// classificationLikelihood returns P(W|C_j) -- the probability of seeing a
// particular word W in a document of this class.
func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error) {
gN, jN, err := classificationLikelihoodWordcounts(db, word)
if err != nil {
return g, j, err
}
gTotal, jTotal, err := classificationLikelihoodStatistics(db, word)
if err != nil {
return g, j, err
}
g = gN / gTotal
j = jN / jTotal
return g, j, err
}
@ -124,12 +150,7 @@ func classificationWord(db *bolt.DB, word string) (g float64, err error) {
// client.
func (m *Mail) Classify(db *bolt.DB) error {
err := m.Clean()
if err != nil {
return err
}
list, err := m.Wordlist()
list, err := m.cleanWordlist()
if err != nil {
return err
}

@ -32,7 +32,8 @@ var _ = Describe("Classify", func() {
err = m.Load("test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
// Load good mail
m = &Mail{
@ -42,7 +43,8 @@ var _ = Describe("Classify", func() {
err = m.Load("test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
})
AfterEach(func() {
// Cleanup
@ -84,7 +86,6 @@ var _ = Describe("Classify", func() {
It("Classify one word from the mail that was learned in good and junk", func() {
m.Learn(dbs["test/Maildir"])
answer, prob, err := Junk(dbs["test/Maildir"], []string{"than"})
Ω(err).ShouldNot(HaveOccurred())

@ -57,7 +57,7 @@ var _ = Describe("Database", func() {
CloseDatabases(dbs)
dbTest := dbs["test/Maildir"]
var n = 4
n := 4
err = dbTest.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("Statistics"))
@ -67,6 +67,7 @@ var _ = Describe("Database", func() {
return nil
})
Ω(err).Should(HaveOccurred())
Ω(n).Should(Equal(4))
})
})
})

@ -12,12 +12,7 @@ func (m *Mail) Learn(db *bolt.DB) error {
log.Println("learn mail " + m.Key)
err := m.Clean()
if err != nil {
return err
}
list, err := m.Wordlist()
list, err := m.cleanWordlist()
if err != nil {
return err
}

@ -51,7 +51,8 @@ var _ = Describe("Learn", func() {
It("Load databases for a maildir, then learn a mail and check whether the word counts are correct in the db", func() {
m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
var jN, sN, gN int
@ -76,7 +77,8 @@ var _ = Describe("Learn", func() {
It("Load databases for a maildir, then learn a mail and check whether individual word counts are equal to 1", func() {
m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"])
Ω(err).ShouldNot(HaveOccurred())
var wordCount uint64

@ -222,6 +222,21 @@ func (m *Mail) Wordlist() (w []string, err error) {
return w, err
}
// cleanWordlist combines Clean and Wordlist in one internal function
func (m *Mail) cleanWordlist() (w []string, err error) {
err = m.Clean()
if err != nil {
return w, err
}
w, err = m.Wordlist()
if err != nil {
return w, err
}
return w, err
}
// LoadMails creates missing directories and then loads all mails from a given
// slice of Maildirs
func LoadMails(d []Maildir) (mails map[Maildir][]*Mail, err error) {

Loading…
Cancel
Save