From b6537dbcea2d4e4930e381786b9d812612e8702d Mon Sep 17 00:00:00 2001 From: Urban Guacamole Date: Mon, 20 Jul 2020 15:57:59 +0200 Subject: [PATCH] Add import-magnetico-db For importing https://github.com/boramalper/magnetico/issues/218 --- .gitignore | 7 ++- import-magnetico-db/main.go | 92 +++++++++++++++++++++++++++++++++++++ snippets.sql | 4 +- 3 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 import-magnetico-db/main.go diff --git a/.gitignore b/.gitignore index e23f8f0..e5c82c9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,13 @@ dump.csv ipfs.html about.html copyright.html -torrent_dump_full.csv.gz +why.html generate-top-torrents +import-magnetico-db/import-magnetico-db +crawl-rss/crawl-rss +seedleech-daemon/seedleech-daemon +spider/spider +.vscode/ api/api *.txt vote-and-donate.html \ No newline at end of file diff --git a/import-magnetico-db/main.go b/import-magnetico-db/main.go new file mode 100644 index 0000000..67f6476 --- /dev/null +++ b/import-magnetico-db/main.go @@ -0,0 +1,92 @@ +package main + +import ( + "database/sql" + "encoding/csv" + "io" + "log" + "os" + "strconv" + "time" + "unicode/utf8" + + "github.com/lib/pq" +) + +func main() { + f, err := os.Open(os.Args[1]) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + db := initDb() + + cr := csv.NewReader(f) + cr.LazyQuotes = true + if err != nil { + log.Fatal(err) + } + + for { + line, err := cr.Read() + if err == io.EOF { + break + } else if err != nil { + if perr, ok := err.(*csv.ParseError); ok && perr.Err == csv.ErrFieldCount { + log.Println(err) + } + } + + infohash := line[0] + if len(infohash) != 40 { + log.Fatal("bad infohash length " + line[0]) + } + + name := line[1] + if len(name) < 2 { + log.Println("bad name length " + line[1]) + continue + } + if !utf8.ValidString(name) { + log.Println("utf8 invalid name") + log.Println(name) + continue + } + + length := line[2] + + addedUnix, err := strconv.ParseInt(line[3], 10, 0) + if err != nil { + log.Fatal(err) + } + added := time.Unix(addedUnix, 0) + + //fmt.Printf("Ih %v name %v len %v added %v", infohash, name, length, added) + _, err = db.Exec("INSERT INTO torrent (infohash, name, length, added) VALUES ($1, $2, $3, $4)", infohash, name, length, added) + if err, ok := err.(*pq.Error); ok { //dark magic + if err.Code != "23505" { + log.Fatal(err) + } + } + } +} + +func initDb() *sql.DB { + connStr := "user=nextgen dbname=nextgen host=/var/run/postgresql" + db, err := sql.Open("postgres", connStr) + if err != nil { + log.Fatal(err) + } + + _, err = db.Exec(`CREATE TABLE IF NOT EXISTS torrent ( + infohash char(40) PRIMARY KEY NOT NULL, + name varchar NOT NULL, + length bigint, + added timestamp DEFAULT current_timestamp + )`) + if err != nil { + log.Fatal(err) + } + return db +} diff --git a/snippets.sql b/snippets.sql index a298cfe..4287e45 100644 --- a/snippets.sql +++ b/snippets.sql @@ -28,4 +28,6 @@ CREATE MATERIALIZED VIEW search AS select torrent.*, fresh.s as s, fresh.l as l, create index vect_inx on search using gin(vect); create unique index uniq_ih on search (infohash); REFRESH MATERIALIZED VIEW fresh; -REFRESH MATERIALIZED VIEW search CONCURRENTLY; \ No newline at end of file +REFRESH MATERIALIZED VIEW CONCURRENTLY search; + +CREATE INDEX "fetch_work_for_seedleech" on trackerdata (tracker, seeders, scraped);