begin handling mails -- far from being done

master
Carlo Strub 7 years ago
parent a5e7b729db
commit b887d7dd46

1
.gitignore vendored

@ -24,3 +24,4 @@ _testmain.go
*.test
*.prof
sisyphus
sisyphus.db

@ -51,7 +51,7 @@ func openDB(maildir string) (db *bolt.DB, err error) {
return db, err
}
// Create DB bucket for Junk inside bucket Wordlists
// Create DB bucket for Good inside bucket Wordlists
err = db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("Wordlists"))
_, err := b.CreateBucketIfNotExists([]byte("Good"))

48
glide.lock generated

@ -1,58 +1,16 @@
hash: 1d7fb4b49ab9cebe25e4d605b7256849d2fb5550372e53dcf1a8dc030d32fce0
updated: 2017-03-11T20:05:25.966344527Z
hash: ad57db461a149fadda777f61bafa73ed65c46ef4f18e9f86b7e282070ea587e2
updated: 2017-03-15T21:05:02.415303554Z
imports:
- name: github.com/boltdb/bolt
version: 583e8937c61f1af6513608ccc75c97b6abdf4ff9
- name: github.com/jbrukh/bayesian
version: bf3f261f9a9c61145c60d47665b0518cc32c774f
- name: github.com/kennygrant/sanitize
version: 6a0bfdde8629a3a3a7418a7eae45c54154692514
- name: github.com/luksen/maildir
version: 5297d9c3091c7d4891c9d4f6fa743d500c038d6f
- name: github.com/urfave/cli
version: 9e5b04886c4bfee2ceba1465b8121057355c4e53
- name: golang.org/x/net
version: a6577fac2d73be281a500b310739095313165611
subpackages:
- html
- html/atom
- name: golang.org/x/sys
version: 99f16d856c9836c42d24e7ab64ea72916925fa97
subpackages:
- unix
testImports:
- name: github.com/onsi/ginkgo
version: ab07225d112dc7a93c289ac5b2e12735c2c46035
subpackages:
- config
- internal/codelocation
- internal/containernode
- internal/failer
- internal/leafnodes
- internal/remote
- internal/spec
- internal/specrunner
- internal/suite
- internal/testingtproxy
- internal/writer
- reporters
- reporters/stenographer
- reporters/stenographer/support/go-colorable
- reporters/stenographer/support/go-isatty
- types
version: 5ca121185e255e5041d7727d77992618545a93d2
- name: github.com/onsi/gomega
version: 1de7ab2df9105aa5c15c4d7e14a8a514e3cb8d4b
subpackages:
- format
- internal/assertion
- internal/asyncassertion
- internal/oraclematcher
- internal/testingtsupport
- matchers
- matchers/support/goraph/bipartitegraph
- matchers/support/goraph/edge
- matchers/support/goraph/node
- matchers/support/goraph/util
- types
- name: gopkg.in/yaml.v2
version: a3f3340b5840cee44f372bddb5880fcbc419b46a

@ -1,6 +1,5 @@
package: github.com/carlostrub/sisyphus
import:
- package: github.com/jbrukh/bayesian
- package: github.com/kennygrant/sanitize
- package: github.com/luksen/maildir
- package: github.com/boltdb/bolt

@ -3,6 +3,7 @@ package main
import (
"bufio"
"errors"
"math"
"mime/quotedprintable"
"regexp"
"strings"
@ -19,26 +20,20 @@ type Mail struct {
}
// Index loads all mail keys from the Maildir directory for processing.
func Index(d string) (m []*Mail, err error) {
func Index(d string, junk bool) (m []*Mail, err error) {
g, err := maildir.Dir(d).Keys()
if err != nil {
return m, err
if junk {
j, err := maildir.Dir(d + "/.Junk").Keys()
} else {
j, err := maildir.Dir(d).Keys()
}
for _, val := range g {
var new Mail
new.Key = val
m = append(m, &new)
}
j, err := maildir.Dir(d + "/.Junk").Keys()
if err != nil {
return m, err
}
for _, val := range j {
var new Mail
new.Key = val
new.Junk = true
new.Junk = junk
m = append(m, &new)
}
@ -63,7 +58,6 @@ func cleanString(i string) (s string, err error) {
s = strings.Replace(s, "charset", " ", -1)
s = strings.Replace(s, "content-transfer-encoding", " ", -1)
s = strings.Replace(s, "content-type", " ", -1)
s = strings.Replace(s, "cp-850", " ", -1)
s = strings.Replace(s, "image/jpeg", " ", -1)
s = strings.Replace(s, "multipart/alternative", " ", -1)
s = strings.Replace(s, "multipart/related", " ", -1)
@ -119,24 +113,32 @@ func wordlist(s string) (l []string, err error) {
list := make(map[string]int)
raw := strings.Split(s, " ")
var clean []string
for _, i := range raw {
for _, w := range raw {
// no long or too short words
length := len(i)
length := len(w)
if length < 4 || length > 10 {
continue
}
// no numbers, special characters, etc. -- only words
match, _ := regexp.MatchString("(^[a-z]+$)", i)
match, _ := regexp.MatchString("(^[a-z]+$)", w)
if !match {
continue
} else {
list[i]++
clean = append(clean, w)
}
}
// only the first 200 words count
maxWords := int(math.Min(200, float64(len(clean))))
for i := 0; i < maxWords; i++ {
w := clean[i]
list[w]++
}
for word, count := range list {
if count > 10 {
continue

@ -162,7 +162,7 @@ var _ = Describe("Mail", func() {
Ω(err).ShouldNot(HaveOccurred())
subjectOutput := "hello"
bodyOutput := " ------ 000 0032 01d2912f.05324bc6 : ; : dear cs we are looking for employees working remotely my name is kari i am the personnel manager of a large international company most of the work you can do from home that is at a distance salary is 2000- 5300 if you are interested in this offer please visit our site best regards ------ 000 0032 01d2912f.05324bc6 : ; : dear cs we are looking for employees working remotely. my name is kari i am the personnel manager of a large international company. most of the work you can do from home that is at a distance. salary is 2000- 5300. if you are interested in this offer please visit our site best regards ------ 000 0032 01d2912f.05324bc6-- "
bodyOutput := " ------ 000 0032 01d2912f.05324bc6 : ; cp-850 : dear cs we are looking for employees working remotely my name is kari i am the personnel manager of a large international company most of the work you can do from home that is at a distance salary is 2000- 5300 if you are interested in this offer please visit our site best regards ------ 000 0032 01d2912f.05324bc6 : ; cp-850 : dear cs we are looking for employees working remotely. my name is kari i am the personnel manager of a large international company. most of the work you can do from home that is at a distance. salary is 2000- 5300. if you are interested in this offer please visit our site best regards ------ 000 0032 01d2912f.05324bc6-- "
Ω(m).Should(Equal(
s.Mail{
Key: "1488226337.M327822P8269.mail.carlostrub.ch,S=3620,W=3730",

@ -8,20 +8,16 @@ import (
"os/signal"
"syscall"
"github.com/boltdb/bolt"
"github.com/urfave/cli"
)
var (
// Processed is a map of e-mail IDs and the value set to true if Junk
// Processed map[string]bool
const (
good = "0"
junk = "1"
)
func main() {
// Get working directory
wd, err := os.Getwd()
if err != nil {
panic(err)
}
// Define App
app := cli.NewApp()
@ -43,9 +39,7 @@ func main() {
},
}
maildirPaths := cli.StringSlice([]string{
wd + "/Maildir",
})
maildirPaths := cli.StringSlice([]string{})
var pidfile *string
pidfile = new(string)
@ -81,14 +75,16 @@ func main() {
fmt.Print(`
`)
`)
// Make arrangement to remove PID file upon receiving the SIGTERM from kill command
ch := make(chan os.Signal, 1)
signal.Notify(ch, os.Interrupt, os.Kill, syscall.SIGTERM)
@ -108,25 +104,67 @@ func main() {
os.Exit(0)
}()
// var maildir []string
// if maildir == nil {
// return errors.New("no maildir selected")
// }
//
// // Load the Maildir
// mails, err := Index(maildirPaths[0])
// if err != nil {
// return cli.NewExitError(err, 66)
// }
//
// fmt.Println(mails)
//
// // Open the database
// db, err := openDB(maildirPaths[0])
// if err != nil {
// return cli.NewExitError(err, 66)
// }
// defer db.Close()
// Load the Maildir
if len(maildirPaths) < 1 {
log.Fatal("No Maildir set.")
}
if len(maildirPaths) > 1 {
log.Fatal("Sorry... only one Maildir supported as of today.")
}
log.Println("loading mails")
mailsGood, err := Index(maildirPaths[0], false)
if err != nil {
log.Fatal("Wrong path to Maildir")
}
log.Println("good mails loaded")
os.MkdirAll(maildirPaths[0]+"/.Junk/cur", 0700)
mailsJunk, err := Index(maildirPaths[0], true)
if err != nil {
log.Fatal("Wrong path to Maildir")
}
log.Println("junk mails loaded")
// Open the database
log.Println("loading database")
db, err := openDB(maildirPaths[0])
if err != nil {
log.Fatal(err)
}
defer db.Close()
log.Println("database loaded")
// Check for unprocessed mail
var unprocessedJunk, unprocessedGood []string
for i := range mailsGood {
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("Processed"))
v := b.Get([]byte(mails[i].Key))
if len(v) == 0 {
unprocessedGood = append(unprocessedGood, mails[i].Key)
}
if string(v) == junk {
unprocessedGood = append(unprocessedGood, mails[i].Key)
}
return nil
})
}
for i := range mailsJunk {
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("Processed"))
v := b.Get([]byte(mails[i].Key))
if len(v) == 0 {
unprocessedJunk = append(unprocessedJunk, mails[i].Key)
}
if string(v) == good {
unprocessedJunk = append(unprocessedJunk, mails[i].Key)
}
return nil
})
}
// Classify and learn unprocessed mail
mux := http.NewServeMux()
log.Fatalln(http.ListenAndServe(":8080", mux))

Loading…
Cancel
Save