mirror of https://github.com/carlostrub/sisyphus
start loading mails and classifying them
parent
1694b0b35d
commit
da427ae069
@ -1,6 +1,8 @@
|
|||||||
hash: 0fbdb475136d5d29feb99b15dafd533580a8bbbf4fcfd801ddeeeb24e9a840c0
|
hash: d18ff656d998425dece8d9d7fbdf5e7c0dd9a2a41b0fc791d55b242d1f8915e7
|
||||||
updated: 2017-02-20T22:35:31.503897743Z
|
updated: 2017-02-22T20:04:09.761903217Z
|
||||||
imports:
|
imports:
|
||||||
|
- name: github.com/jbrukh/bayesian
|
||||||
|
version: bf3f261f9a9c61145c60d47665b0518cc32c774f
|
||||||
- name: github.com/luksen/maildir
|
- name: github.com/luksen/maildir
|
||||||
version: 5297d9c3091c7d4891c9d4f6fa743d500c038d6f
|
version: 5297d9c3091c7d4891c9d4f6fa743d500c038d6f
|
||||||
testImports: []
|
testImports: []
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
package: github.com/carlostrub/sisyphus
|
package: github.com/carlostrub/sisyphus
|
||||||
import:
|
import:
|
||||||
- package: github.com/luksen/maildir
|
- package: github.com/luksen/maildir
|
||||||
|
- package: github.com/jbrukh/bayesian
|
||||||
|
@ -1,42 +1,95 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"bufio"
|
||||||
"log"
|
"log"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/jbrukh/bayesian"
|
||||||
"github.com/luksen/maildir"
|
"github.com/luksen/maildir"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// good is the class of good mails that are not supposed to be Spam
|
||||||
|
good bayesian.Class = "Good"
|
||||||
|
// junk is the class of Spam mails
|
||||||
|
junk bayesian.Class = "Junk"
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Maildirs holds a set of mail directories to handle.
|
// Maildir holds a set of mail directories to handle.
|
||||||
Maildirs []string
|
Maildir = "/usr/home/cs/Maildir.TEST"
|
||||||
|
|
||||||
|
// processed is a map of e-mail IDs and true if processed already.
|
||||||
|
processed map[string]bool
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
// Mails contains the keys of all mails in the Junk.cur and cur directories.
|
||||||
Maildirs = []string{"/usr/home/cs/Maildir.TEST"}
|
type Mails struct {
|
||||||
|
Junk, Good []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Classifiers contains the classifiers for mail subjects and bodies
|
||||||
|
type Classifiers struct {
|
||||||
|
Subject, Body *bayesian.Classifier
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadMails loads all mail keys from the Maildir directory for processing.
|
||||||
|
func LoadMails() (m Mails, err error) {
|
||||||
|
|
||||||
|
m.Junk, err = maildir.Dir(Maildir + "/.Junk").Keys()
|
||||||
|
if err != nil {
|
||||||
|
return m, err
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Good, err = maildir.Dir(Maildir).Keys()
|
||||||
|
if err != nil {
|
||||||
|
return m, err
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
return m, nil
|
||||||
var Bad, Good []string
|
}
|
||||||
|
|
||||||
|
// Learn initially classifies all mails and returns the respective classifiers.
|
||||||
|
func (m Mails) Learn() (c Classifiers, err error) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func cleanText(t string) (c string, err error) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// getContent reads mails' subjects and bodies and returns the respective
|
||||||
|
// slices of strings
|
||||||
|
func getContent(keys []string) (s, b []string, err error) {
|
||||||
|
for _, k := range keys {
|
||||||
|
|
||||||
for _, dir := range Maildirs {
|
message, err := maildir.Dir(Maildir).Message(k)
|
||||||
var keysBad, keysGood []string
|
|
||||||
keysBad, err = maildir.Dir(dir + "/.Junk").Keys()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
return s, b, err
|
||||||
}
|
}
|
||||||
|
|
||||||
Bad = append(Bad, keysBad...)
|
// get Subject
|
||||||
|
subject := message.Header.Get("Subject")
|
||||||
|
s = append(s, strings.Split(subject, " ")...)
|
||||||
|
|
||||||
keysGood, err = maildir.Dir(dir).Keys()
|
// get Body
|
||||||
if err != nil {
|
bScanner := bufio.NewScanner(message.Body)
|
||||||
log.Fatal(err)
|
for bScanner.Scan() {
|
||||||
|
b = append(b, strings.Split(bScanner.Text(), " ")...)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
Good = append(Good, keysGood...)
|
_, err := LoadMails()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("Bad guys:")
|
// Create a classifier
|
||||||
fmt.Println(Bad)
|
//classifier := bayesian.NewClassifier(Good, Junk)
|
||||||
fmt.Println("Good guys:")
|
|
||||||
fmt.Println(Good)
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue