glide up. load mails correctly before learning or classifying them.

improve some logging messages
master
Carlo Strub 7 years ago
parent 3c665ed446
commit 87b51ebee0

@ -1,7 +1,6 @@
package sisyphus
import (
"errors"
"os"
log "github.com/sirupsen/logrus"
@ -79,13 +78,16 @@ func classificationStatistics(db *bolt.DB) (gTotal, jTotal float64, err error) {
}
if gTotal == 0 && jTotal == 0 {
return errors.New("no mails have yet been learned")
log.Warning("no mails have yet been learned")
return nil
}
if gTotal == 0 {
return errors.New("no good mails have yet been learned")
log.Warning("no good mails have yet been learned")
return nil
}
if jTotal == 0 {
return errors.New("no junk mails have yet been learned")
log.Warning("no junk mails have yet been learned")
return nil
}
return nil
@ -137,27 +139,37 @@ func classificationWord(db *bolt.DB, word string) (g float64, err error) {
// decides whether it is junk and -- if so -- moves it to the Junk folder. If
// it is not junk, the mail is untouched so it can be handled by the mail
// client.
func (m *Mail) Classify(db *bolt.DB) error {
func (m *Mail) Classify(db *bolt.DB, dir Maildir) (err error) {
m.New = true
err = m.Load(dir)
if err != nil {
return err
}
list, err := m.cleanWordlist()
if err != nil {
return err
}
junk, _, err := Junk(db, list)
junk, prob, err := Junk(db, list)
if err != nil {
return err
}
m.Junk = junk
log.WithFields(log.Fields{
"mail": m.Key,
"junk": m.Junk,
"mail": m.Key,
"junk": m.Junk,
"probability": prob,
"dir": string(dir),
}).Info("Classified")
// Move mail around if junk.
if junk {
m.Junk = junk
err := os.Rename("./new/"+m.Key, "./.Junk/cur/"+m.Key)
err := os.Rename(string(dir)+"/new/"+m.Key, string(dir)+"/.Junk/cur/"+m.Key)
if err != nil {
return err
}
@ -175,6 +187,9 @@ func (m *Mail) Classify(db *bolt.DB) error {
func Junk(db *bolt.DB, wordlist []string) (junk bool, prob float64, err error) {
var probabilities []float64
// initial value should be no junk
prob = 1.0
for _, val := range wordlist {
var p float64
p, err = classificationWord(db, val)

@ -35,7 +35,7 @@ var _ = Describe("Classify Mails", func() {
err = m.Load("test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
err = m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"], "test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
// Load good mail
@ -46,7 +46,7 @@ var _ = Describe("Classify Mails", func() {
err = m.Load("test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
err = m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"], "test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
})
AfterEach(func() {

@ -56,7 +56,7 @@ func (p Pidfile) DaemonStart() {
if err != nil {
log.WithFields(log.Fields{
"err": err,
}).Warning("Save process ID file")
}).Error("Save process ID file")
}
log.WithFields(log.Fields{
"pidfile": p,
@ -96,7 +96,7 @@ func (p Pidfile) DaemonStop() {
// remove PID file
err = os.Remove(string(p))
if err != nil {
log.Warning("Unable to remove process ID file")
log.Error("Unable to remove process ID file")
}
log.WithFields(log.Fields{

@ -10,7 +10,7 @@ import (
func openDB(m Maildir) (db *bolt.DB, err error) {
log.WithFields(log.Fields{
"maildir": m,
"dir": string(m),
}).Info("Loading database")
// Open the sisyphus.db data file in your current directory.
// It will be created if it doesn't exist.

20
glide.lock generated

@ -1,8 +1,10 @@
hash: d1a72ce9b2ed6346e486f11d8cc0a39e4ca4243573efd7bdf5225bbbbc1b1706
updated: 2017-05-25T20:16:35.853054558Z
hash: 1c42a5aaeffd2a78c3984ac595ba68a545cd83b7ebdcd965351f8556ab94865d
updated: 2017-06-04T20:35:45.682023296Z
imports:
- name: github.com/boltdb/bolt
version: 583e8937c61f1af6513608ccc75c97b6abdf4ff9
- name: github.com/carlostrub/maildir
version: 87290f3144a16d94b0c948cfbb38f5097a9fd6f3
- name: github.com/fsnotify/fsnotify
version: 4da3e2cfbabc9f751898f250b49f2439785783a1
- name: github.com/gonum/blas
@ -31,22 +33,20 @@ imports:
version: 42a372f1f7b88cf8df268560cf6e21e6e249ce10
- name: github.com/kennygrant/sanitize
version: 6a0bfdde8629a3a3a7418a7eae45c54154692514
- name: github.com/luksen/maildir
version: 5297d9c3091c7d4891c9d4f6fa743d500c038d6f
- name: github.com/numbleroot/maildir
version: 7c13ca37771042775818e2a384098f0844575b32
- name: github.com/retailnext/hllpp
version: 9fdfea05b3e55bebe7beb22d16c7db15d46cd518
- name: github.com/sirupsen/logrus
version: ba1b36c82c5e05c4f912a88eab0dcd91a171688f
- name: github.com/urfave/cli
version: d70f47eeca3afd795160003bc6e28b001d60c67c
- name: golang.org/x/net
version: 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
version: e4fa1c5465ad6111f206fc92186b8c83d64adbe1
subpackages:
- html
- html/atom
- html/charset
- name: golang.org/x/sys
version: a55a76086885b80f79961eacb876ebd8caf3868d
version: b90f89a1e7a9c1f6b918820b3daa7f08488c8594
subpackages:
- unix
- name: gopkg.in/urfave/cli.v2
@ -73,7 +73,7 @@ testImports:
- reporters/stenographer/support/go-isatty
- types
- name: github.com/onsi/gomega
version: 00acfa9d92a386415bd235ab069c52063f925998
version: 39a54bd3c3bbfe1c331a9b3207e92134c77ed812
subpackages:
- format
- internal/assertion
@ -87,7 +87,7 @@ testImports:
- matchers/support/goraph/util
- types
- name: golang.org/x/text
version: 19e51611da83d6be54ddafce4a4af510cb3e9ea4
version: ccbd3f7822129ff389f8ca4858a9b9d4d910531c
subpackages:
- encoding
- encoding/charmap

@ -1,7 +1,6 @@
package: github.com/carlostrub/sisyphus
import:
- package: github.com/kennygrant/sanitize
- package: github.com/luksen/maildir
- package: github.com/boltdb/bolt
version: 1.3.x
- package: github.com/fsnotify/fsnotify
@ -11,6 +10,8 @@ import:
version: ^0.11.5
- package: gopkg.in/urfave/cli.v2
version: 1.19.x
- package: github.com/numbleroot/maildir
- package: github.com/carlostrub/maildir
testImport:
- package: github.com/onsi/ginkgo
- package: github.com/onsi/gomega

@ -71,12 +71,18 @@ func (m *Mail) learnStatistics(db *bolt.DB) error {
}
// Learn adds the the mail key to the list of words using hyper log log algorithm.
func (m *Mail) Learn(db *bolt.DB) error {
func (m *Mail) Learn(db *bolt.DB, dir Maildir) (err error) {
log.WithFields(log.Fields{
"dir": string(dir),
"mail": m.Key,
}).Info("Learn mail")
err = m.Load(dir)
if err != nil {
return err
}
list, err := m.cleanWordlist()
if err != nil {
return err

@ -51,7 +51,7 @@ var _ = Describe("Learn", func() {
It("Load databases for a maildir, then learn a mail and check whether the word counts are correct in the db", func() {
err = m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"], "test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
var jN, sN, gN int
@ -77,7 +77,7 @@ var _ = Describe("Learn", func() {
It("Load databases for a maildir, then learn a mail and check whether individual word counts are equal to 1", func() {
err = m.Learn(dbs["test/Maildir"])
err = m.Learn(dbs["test/Maildir"], "test/Maildir")
Ω(err).ShouldNot(HaveOccurred())
var wordCount uint64

@ -5,14 +5,15 @@ import (
"errors"
"math"
"mime/quotedprintable"
"net/mail"
"os"
"regexp"
"strings"
log "github.com/sirupsen/logrus"
"github.com/carlostrub/maildir"
"github.com/kennygrant/sanitize"
"github.com/luksen/maildir"
)
// Maildir represents the address to a Maildir directory
@ -80,12 +81,19 @@ func (d Maildir) Index() (m []*Mail, err error) {
}
// Load reads a mail's subject and body
func (m *Mail) Load(d string) error {
func (m *Mail) Load(dir Maildir) (err error) {
if m.Junk {
d = d + "/.Junk"
var message *mail.Message
message = new(mail.Message)
switch {
case m.Junk:
dir = dir + Maildir("/.Junk")
case m.New:
dir = dir + Maildir("/new")
}
message, err := maildir.Dir(d).Message(m.Key)
message, err = maildir.Dir(dir).Message(m.Key)
if err != nil {
return err
}

@ -144,11 +144,12 @@ func main() {
db := dbs[d]
m := mails[d]
for _, val := range m {
err := val.Learn(db)
err := val.Learn(db, d)
if err != nil {
log.WithFields(log.Fields{
"err": err,
}).Warning("Cannot learn mail")
"err": err,
"mail": val.Key,
}).Error("Cannot learn mail")
}
}
}
@ -168,22 +169,23 @@ func main() {
select {
case event := <-watcher.Events:
if event.Op&fsnotify.Create == fsnotify.Create {
mailName := strings.Split(event.Name, "/")
mailDir := strings.TrimRight(event.Name, "/new/"+mailName[len(mailName)-1])
path := strings.Split(event.Name, "/new/")
m := sisyphus.Mail{
Key: mailName[len(mailName)-1],
Key: path[1],
}
err = m.Classify(dbs[sisyphus.Maildir(mailDir)])
err = m.Classify(dbs[sisyphus.Maildir(path[0])], sisyphus.Maildir(path[0]))
if err != nil {
log.Print(err)
log.WithFields(log.Fields{
"err": err,
}).Error("Classify mail")
}
}
case err := <-watcher.Errors:
log.WithFields(log.Fields{
"err": err,
}).Warning("Problem with directory watcher")
}).Error("Problem with directory watcher")
}
}
}()

Loading…
Cancel
Save