mirror of https://github.com/carlostrub/sisyphus
implement learning and classifying using Bayes' rule and Hyperloglog
data structures -- still way to go though.master
parent
0497999a15
commit
b5b3792efe
@ -1,21 +1,15 @@
|
||||
[
|
||||
{"linter":"gas","severity":"warning","path":"bayesian.go","line":18,"col":0,"message":"Errors unhandled.,LOW,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"bayesian.go","line":38,"col":0,"message":"Errors unhandled.,LOW,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"bayesian.go","line":42,"col":0,"message":"Errors unhandled.,LOW,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"bayesian.go","line":45,"col":0,"message":"Errors unhandled.,LOW,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"bayesian.go","line":50,"col":0,"message":"Errors unhandled.,LOW,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"daemon.go","line":45,"col":0,"message":"Subprocess launching with variable.,HIGH,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"daemon.go","line":115,"col":0,"message":"Subprocess launching with variable.,HIGH,HIGH"},
|
||||
{"linter":"gas","severity":"warning","path":"daemon.go","line":122,"col":0,"message":"Subprocess launching with variable.,HIGH,HIGH"},
|
||||
{"linter":"gocyclo","severity":"warning","path":"mail.go","line":232,"col":0,"message":"cyclomatic complexity 16 of function (*Mail).Classify() is high (\u003e 10)"},
|
||||
{"linter":"gas","severity":"warning","path":"mail.go","line":168,"col":0,"message":"Errors unhandled.,LOW,HIGH"},
|
||||
{"linter":"dupl","severity":"warning","path":"mail_test.go","line":135,"col":0,"message":"duplicate of mail_test.go:160-183"},
|
||||
{"linter":"dupl","severity":"warning","path":"mail_test.go","line":160,"col":0,"message":"duplicate of mail_test.go:185-208"},
|
||||
{"linter":"dupl","severity":"warning","path":"mail_test.go","line":185,"col":0,"message":"duplicate of mail_test.go:210-233"},
|
||||
{"linter":"dupl","severity":"warning","path":"mail_test.go","line":210,"col":0,"message":"duplicate of mail_test.go:235-258"},
|
||||
{"linter":"dupl","severity":"warning","path":"mail_test.go","line":235,"col":0,"message":"duplicate of mail_test.go:260-283"},
|
||||
{"linter":"dupl","severity":"warning","path":"mail_test.go","line":260,"col":0,"message":"duplicate of mail_test.go:135-158"},
|
||||
{"linter":"errcheck","severity":"warning","path":"bayesian.go","line":18,"col":9,"message":"error return value not checked (db.View(func(tx *bolt.Tx) error {)"},
|
||||
{"linter":"errcheck","severity":"warning","path":"bayesian.go","line":38,"col":9,"message":"error return value not checked (db.View(func(tx *bolt.Tx) error {)"},
|
||||
{"linter":"errcheck","severity":"warning","path":"daemon.go","line":26,"col":18,"message":"error return value not checked (defer file.Close())"},
|
||||
{"linter":"errcheck","severity":"warning","path":"mail.go","line":275,"col":11,"message":"error return value not checked (db.Update(func(tx *bolt.Tx) error {)"}
|
||||
{"linter":"errcheck","severity":"warning","path":"learn.go","line":37,"col":14,"message":"error return value not checked (bucket.Put([]byte(val), word.Marshal()))"},
|
||||
{"linter":"errcheck","severity":"warning","path":"learn.go","line":62,"col":8,"message":"error return value not checked (p.Put([]byte(key), counter.Marshal()))"}
|
||||
]
|
||||
|
@ -0,0 +1,83 @@
|
||||
package sisyphus
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
"github.com/retailnext/hllpp"
|
||||
)
|
||||
|
||||
// Learn adds the the mail key to the list of words using hyper log log algorithm.
|
||||
func (m *Mail) Learn(db *bolt.DB) error {
|
||||
|
||||
log.Println("learn mail " + m.Key)
|
||||
|
||||
err := m.Clean()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
list := m.Wordlist()
|
||||
|
||||
wordKey := "Good"
|
||||
if m.Junk {
|
||||
wordKey = "Junk"
|
||||
}
|
||||
|
||||
// Learn words
|
||||
for _, val := range list {
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
b := tx.Bucket([]byte("Wordlists"))
|
||||
|
||||
bucket := b.Bucket([]byte(wordKey))
|
||||
wordRaw := bucket.Get([]byte(val))
|
||||
var word *hllpp.HLLPP
|
||||
if len(wordRaw) == 0 {
|
||||
word = hllpp.New()
|
||||
} else {
|
||||
word, err = hllpp.Unmarshal(wordRaw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
word.Add([]byte(m.Key))
|
||||
|
||||
err = bucket.Put([]byte(val), word.Marshal())
|
||||
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Update the statistics counter
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
p := tx.Bucket([]byte("Statistics"))
|
||||
|
||||
key := "ProcessedGood"
|
||||
if m.Junk {
|
||||
key = "ProcessedJunk"
|
||||
}
|
||||
|
||||
keyRaw := p.Get([]byte(key))
|
||||
var counter *hllpp.HLLPP
|
||||
if len(keyRaw) == 0 {
|
||||
counter = hllpp.New()
|
||||
} else {
|
||||
counter, err = hllpp.Unmarshal(keyRaw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
counter.Add([]byte(m.Key))
|
||||
|
||||
err = p.Put([]byte(key), counter.Marshal())
|
||||
|
||||
return err
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
Loading…
Reference in New Issue