Browse Source

initial

master
Chakib Benziane 1 year ago
commit
a948d4bf69
54 changed files with 3965 additions and 0 deletions
  1. +16
    -0
      .gitignore
  2. +52
    -0
      Dockerfile
  3. +9
    -0
      Dockerfile-sqliteweb
  4. +22
    -0
      Makefile
  5. +47
    -0
      README.md
  6. +71
    -0
      api.go
  7. +188
    -0
      bitcoin/addresses.go
  8. +85
    -0
      commands.go
  9. +1
    -0
      config.toml
  10. +58
    -0
      config/config.go
  11. +72
    -0
      db/db.go
  12. +42
    -0
      docker-compose.yml
  13. +15
    -0
      docker-entrypoint.sh
  14. +58
    -0
      encoder/encoder.go
  15. +65
    -0
      export/btc_addresses.go
  16. +17
    -0
      export/bulletin.go
  17. +214
    -0
      export/export.go
  18. +18
    -0
      export/newsletters.go
  19. +14
    -0
      export/release.go
  20. +15
    -0
      export/rfc.go
  21. +56
    -0
      export/weeks.go
  22. +112
    -0
      feed_commands.go
  23. +117
    -0
      feeds/ctrl.go
  24. +262
    -0
      feeds/feed.go
  25. +9
    -0
      feeds/form.go
  26. +26
    -0
      filters/filters.go
  27. +65
    -0
      filters/mailchimp_filter.go
  28. +21
    -0
      github/auth.go
  29. +75
    -0
      github/github.go
  30. +35
    -0
      go.mod
  31. +110
    -0
      go.sum
  32. +36
    -0
      handlers/handlers.go
  33. +200
    -0
      handlers/releases.go
  34. +226
    -0
      handlers/rfc.go
  35. +118
    -0
      handlers/rss.go
  36. +328
    -0
      jobs.go
  37. +29
    -0
      logging/log.go
  38. +96
    -0
      main.go
  39. +36
    -0
      parse_test.go
  40. +176
    -0
      posts/posts.go
  41. +28
    -0
      posts_test.go
  42. +379
    -0
      scheduler.go
  43. +62
    -0
      server.go
  44. +29
    -0
      static/bolts.go
  45. +32
    -0
      static/data.go
  46. +38
    -0
      types/json.go
  47. +28
    -0
      types/stringlist.go
  48. +9
    -0
      utils/integers.go
  49. +18
    -0
      utils/map.go
  50. +16
    -0
      utils/paths.go
  51. +14
    -0
      utils/print.go
  52. +25
    -0
      utils/shortid.go
  53. +49
    -0
      utils/time.go
  54. +26
    -0
      utils/time_test.go

+ 16
- 0
.gitignore View File

@ -0,0 +1,16 @@
# ---> Go
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
*.log
# Binary
hugobot
# Sqlite
*.sqlite-*

+ 52
- 0
Dockerfile View File

@ -0,0 +1,52 @@
FROM golang:1.11-alpine as builder
MAINTAINER Chakib <contact@bitcointechweekly.com>
# Copy source
COPY . /go/src/hugobot
# install dependencies and build
RUN apk add --no-cache --upgrade \
ca-certificates \
git \
openssh \
make \
alpine-sdk
RUN cd /go/src/hugobot \
&& make install
################################
#### FINAL IMAGE
###############################
FROM alpine as final
ENV WEBSITE_PATH=/website
ENV HUGOBOT_DB_PATH=/db
RUN apk add --no-cache --upgrade \
ca-certificates \
bash \
sqlite \
jq
COPY --from=builder /go/bin/hugobot /bin/
RUN mkdir -p ${HUGOBOT_DB_PATH}
RUN mkdir -p ${WEBSITE_PATH}
VOLUME ${HUGOBOT_DB_PATH}
# Expose API ports
EXPOSE 8734
# copy entrypoint
COPY "docker-entrypoint.sh" /entry
ENTRYPOINT ["/entry"]
CMD ["hugobot", "server"]

+ 9
- 0
Dockerfile-sqliteweb View File

@ -0,0 +1,9 @@
FROM coleifer/sqlite
RUN apk add --no-cache --virtual .build-reqs build-base gcc make \
&& pip install --no-cache-dir cython \
&& pip install --no-cache-dir flask peewee sqlite-web \
&& apk del .build-reqs
EXPOSE 8080
VOLUME /db
WORKDIR /db
CMD sqlite_web -H 0.0.0.0 -x $SQLITE_DATABASE -P

+ 22
- 0
Makefile View File

@ -0,0 +1,22 @@
TARGET=hugobot
GOINSTALL := GO111MODULE=on go install -v
GOBUILD := GO111MODULE=on go build -v
PKG := hugobot
.PHONY: all build install
all: build
build:
$(GOBUILD) -o $(TARGET)
install:
$(GOINSTALL)

+ 47
- 0
README.md View File

@ -0,0 +1,47 @@
**MIRRORED FROM**: https://git.sp4ke.com/sp4ke/hugobot
# HUGOBOT
*hugobot* is a an automated content fetch and aggregation bot for [Hugo][hugo] data
driven websites. It has the following features:
## Data fetch
- Add feeds to the bot in the `feeds` sqlite table
- Currently handles these types of feeds: `RSS`, `Github Releases`, `Newsletters`
- Define your own feed types by implementing the `JobHandler` interface (see
`handlers/handlers.go`).
- Hugobot automatically fetch new posts from the feeds you defined
- It runs periodically to download new posts in the defined feeds.
- Everything is saved on an sqlite
- The scheduler can handle any number of tasks and uses leveldb for
caching/resuming jobs.
## Hugo export
- Data is automatically exported to the configured Hugo website path.
- It can export `markdwon` files or `json/toml` data files
- All fields in the exported files can be customized
- You can define custom output formats by using the `FormatHandler` interface.
## API
- *hugobot* also includes a webserver API that can be used with Hugo [Data
Driven Mode][data-driven].
- WIP: Insert and query data
- An example usage is the automated generation of Bitcoin addresses for new
articles on [bitcointechweekly.com][btw-btc]
## Sqliteweb interface
- See Docker files
[data-driven]:https://gohugo.io/templates/data-templates/#data-driven-content
[btw-btc]:https://bitcointechweekly.com/btc/3Jv15g4G5LDnBJPDh1e2ja8NPnADzMxhVh
[hugo]:https://gohugo.io

+ 71
- 0
api.go View File

@ -0,0 +1,71 @@
package main
import (
"io"
"os"
"strconv"
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/config"
"git.sp4ke.com/sp4ke/hugobot/v3/bitcoin"
gum "git.sp4ke.com/sp4ke/gum.git"
"github.com/gin-gonic/gin"
)
var (
apiLogFile *os.File
)
type API struct {
router *gin.Engine
}
func (api *API) Run(m gum.UnitManager) {
feedsRoute := api.router.Group("/feeds")
{
feedCtrl := &feeds.FeedCtrl{}
feedsRoute.POST("/", feedCtrl.Create)
feedsRoute.DELETE("/:id", feedCtrl.Delete)
feedsRoute.GET("/", feedCtrl.List) // Get all
//feedsRoute.Get("/:id", feedCtrl.GetById) // Get one
}
btcRoute := api.router.Group("/btc")
{
btcRoute.GET("/address", bitcoin.GetAddressCtrl)
}
// Run router
go func() {
err := api.router.Run(":" + strconv.Itoa(config.C.ApiPort))
if err != nil {
panic(err)
}
}()
// Wait for stop signal
<-m.ShouldStop()
// Shutdown
api.Shutdown()
m.Done()
}
func (api *API) Shutdown() {}
func NewApi() *API {
apiLogFile, _ = os.Create(".api.log")
gin.DefaultWriter = io.MultiWriter(apiLogFile, os.Stdout)
api := &API{
router: gin.Default(),
}
return api
}

+ 188
- 0
bitcoin/addresses.go View File

@ -0,0 +1,188 @@
package bitcoin
import (
"database/sql"
"log"
"net/http"
"git.sp4ke.com/sp4ke/hugobot/v3/db"
"github.com/gin-gonic/gin"
sqlite3 "github.com/mattn/go-sqlite3"
)
var DB = db.DB
const (
DBBTCAddressesSchema = `CREATE TABLE IF NOT EXISTS btc_addresses (
addr_id INTEGER PRIMARY KEY,
address TEXT NOT NULL UNIQUE,
address_position INTEGER NOT NULL DEFAULT 0,
linked_article_title TEXT DEFAULT '',
linked_article_id TEXT NOT NULL DEFAULT '',
used INTEGER NOT NULL DEFAULT 0,
synced INTEGER NOT NULL DEFAULT 0
)`
QueryUnusedAddress = `SELECT * FROM btc_addresses WHERE used = 0 LIMIT 1 `
UpdateAddressQuery = `UPDATE btc_addresses
SET linked_article_id = ?,
linked_article_title = ?,
used = ?
WHERE addr_id = ?
`
)
type BTCAddress struct {
ID int64 `db:"addr_id"`
Address string `db:"address"`
AddrPosition int64 `db:"address_position"`
LinkedArticleTitle string `db:"linked_article_title"`
LinkedArticleID string `db:"linked_article_id"`
Used bool `db:"used"`
Synced bool `db:"synced"`
}
// TODO: Set address to synced
func (a *BTCAddress) SetSynced() error {
a.Synced = true
query := `UPDATE btc_addresses SET synced = :synced WHERE addr_id = :addr_id`
_, err := DB.Handle.NamedExec(query, a)
if err != nil {
return err
}
return nil
}
func GetAddressByPos(pos int) (*BTCAddress, error) {
var btcAddr BTCAddress
err := DB.Handle.Get(&btcAddr,
"SELECT * FROM btc_addresses WHERE address_position = ?",
pos,
)
if err != nil {
return nil, err
}
return &btcAddr, nil
}
func GetAddressByArticleID(artId string) (*BTCAddress, error) {
var btcAddr BTCAddress
err := DB.Handle.Get(&btcAddr,
"SELECT * FROM btc_addresses WHERE linked_article_id = ?",
artId,
)
if err != nil {
return nil, err
}
return &btcAddr, nil
}
func GetAllUsedUnsyncedAddresses() ([]*BTCAddress, error) {
var addrs []*BTCAddress
err := DB.Handle.Select(&addrs,
"SELECT * FROM btc_addresses WHERE used = 1 AND synced = 0",
)
if err != nil {
return nil, err
}
return addrs, nil
}
func GetNextUnused() (*BTCAddress, error) {
var btcAddr BTCAddress
err := DB.Handle.Get(&btcAddr, QueryUnusedAddress)
if err != nil {
return nil, err
}
return &btcAddr, nil
}
func GetAddressForArticle(artId string, artTitle string) (*BTCAddress, error) {
// Check if article already has an assigned address
addr, err := GetAddressByArticleID(artId)
sqliteErr, isSqliteErr := err.(sqlite3.Error)
if (isSqliteErr && sqliteErr.Code != sqlite3.ErrNotFound) ||
(err != nil && !isSqliteErr && err != sql.ErrNoRows) {
log.Println("err")
return nil, err
}
if err == nil {
// If different title update it
if artTitle != addr.LinkedArticleTitle {
addr.LinkedArticleTitle = artTitle
// Store newly assigned address
_, err = DB.Handle.Exec(UpdateAddressQuery,
addr.LinkedArticleID,
addr.LinkedArticleTitle,
addr.Used,
addr.ID,
)
if err != nil {
return nil, err
}
}
return addr, nil
}
// Get next unused address
addr, err = GetNextUnused()
if err != nil {
return nil, err
}
addr.LinkedArticleID = artId
addr.LinkedArticleTitle = artTitle
addr.Used = true
// Store newly assigned address
_, err = DB.Handle.Exec(UpdateAddressQuery,
addr.LinkedArticleID,
addr.LinkedArticleTitle,
addr.Used,
addr.ID,
)
if err != nil {
return nil, err
}
return addr, nil
}
func GetAddressCtrl(c *gin.Context) {
artId := c.Query("articleId")
artTitle := c.Query("articleTitle")
addr, err := GetAddressForArticle(artId, artTitle)
if err != nil {
c.JSON(http.StatusBadRequest,
gin.H{"status": http.StatusBadRequest,
"error": err.Error()})
c.Abort()
return
}
c.JSON(http.StatusOK, gin.H{
"status": http.StatusOK,
"addr": addr.Address,
})
}
func init() {
_, err := DB.Handle.Exec(DBBTCAddressesSchema)
if err != nil {
log.Fatal(err)
}
}

+ 85
- 0
commands.go View File

@ -0,0 +1,85 @@
package main
import (
"git.sp4ke.com/sp4ke/hugobot/v3/export"
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/static"
"log"
cli "gopkg.in/urfave/cli.v1"
)
var startServerCmd = cli.Command{
Name: "server",
Aliases: []string{"s"},
Usage: "Run server",
Action: startServer,
}
var exportCmdGrp = cli.Command{
Name: "export",
Aliases: []string{"e"},
Usage: "Export to hugo",
Subcommands: []cli.Command{
exportPostsCmd,
exportWeeksCmd,
exportBTCAddressesCmd,
},
}
var exportBTCAddressesCmd = cli.Command{
Name: "btc",
Usage: "export bitcoin addresses",
Action: exportAddresses,
}
var exportWeeksCmd = cli.Command{
Name: "weeks",
Usage: "export weeks",
Action: exportWeeks,
}
var exportPostsCmd = cli.Command{
Name: "posts",
Usage: "Export posts to hugo",
Action: exportPosts,
}
func startServer(c *cli.Context) {
server()
}
func exportPosts(c *cli.Context) {
exporter := export.NewHugoExporter()
feeds, err := feeds.ListFeeds()
if err != nil {
log.Fatal(err)
}
for _, f := range feeds {
exporter.Export(*f)
}
// Export static data
err = static.HugoExportData()
if err != nil {
log.Fatal(err)
}
}
func exportWeeks(c *cli.Context) {
err := export.ExportWeeks()
if err != nil {
log.Fatal(err)
}
}
func exportAddresses(c *cli.Context) {
err := export.ExportBTCAddresses()
if err != nil {
log.Fatal(err)
}
}

+ 1
- 0
config.toml View File

@ -0,0 +1 @@
api-port = 8734

+ 58
- 0
config/config.go View File

@ -0,0 +1,58 @@
package config
import (
"log"
"path"
"github.com/fatih/structs"
)
const (
BTCQRCodesDir = "qrcodes"
)
type Config struct {
WebsitePath string
GithubAccessToken string
RelBitcoinAddrContentPath string
ApiPort int
}
var (
C *Config
)
func HugoData() string {
return path.Join(C.WebsitePath, "data")
}
func HugoContent() string {
return path.Join(C.WebsitePath, "content")
}
func RelBitcoinAddrContentPath() string {
return path.Join(C.WebsitePath, C.RelBitcoinAddrContentPath)
}
func RegisterConf(conf string, val interface{}) error {
log.Printf("Setting %#v to %#v", conf, val)
s := structs.New(C)
field, ok := s.FieldOk(conf)
// Conf option not registered in Config struct
if !ok {
return nil
}
err := field.Set(val)
if err != nil {
return err
}
return nil
}
func init() {
C = new(Config)
}

+ 72
- 0
db/db.go View File

@ -0,0 +1,72 @@
package db
import (
"fmt"
"log"
"net/url"
"os"
"path/filepath"
"time"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
)
const (
DBName = "hugobot.sqlite"
DBPragma = ` PRAGMA foreign_keys = ON; `
DBBasePathEnv = "HUGOBOT_DB_PATH"
)
var (
DBOptions = map[string]string{
"_journal_mode": "WAL",
}
DB *Database
)
type Database struct {
Handle *sqlx.DB
}
func (d *Database) Open() error {
dsnOptions := &url.Values{}
for k, v := range DBOptions {
dsnOptions.Set(k, v)
}
// Get db base path
path, set := os.LookupEnv(DBBasePathEnv)
if !set {
path = "."
}
path = filepath.Join(path, DBName)
//path = fmt.Sprintf("%s/%s", path, DBName)
dsn := fmt.Sprintf("file:%s?%s", path, dsnOptions.Encode())
log.Printf("Opening sqlite db %s\n", dsn)
var err error
d.Handle, err = sqlx.Open("sqlite3", dsn)
if err != nil {
log.Fatal(err)
}
// Execute Pragmas
d.Handle.MustExec(DBPragma)
return nil
}
type AutoIncr struct {
ID int64 `json:"id"`
Created time.Time `json:"created"`
}
func init() {
DB = &Database{}
DB.Open()
}

+ 42
- 0
docker-compose.yml View File

@ -0,0 +1,42 @@
version: "2.2"
volumes:
js-deps:
build:
sqlite-db:
services:
bot:
image: hugobot/hugobot
build: .
volumes:
- path_to_website:/website
- $PWD:/hugobot
- sqlite-db:/db
environment:
- BUILD_DIR=/build
restart: on-failure
ports:
- "8734:8734"
working_dir: /hugobot
sqlite-web:
image: hugobot/sqlite-web
build:
context: .
dockerfile: ./Dockerfile-sqliteweb
ports:
- "8080"
volumes:
- sqlite-db:/db
environment:
- SQLITE_DATABASE=hugobot.sqlite
- SQLITE_WEB_PASSWORD=hugobot

+ 15
- 0
docker-entrypoint.sh View File

@ -0,0 +1,15 @@
#!/bin/bash
set -e
if [[ -z "$(ls -A "$HUGOBOT_DB_PATH")" ]];then
echo "WARNING !! $HUGOBOT_DB_PATH is empty, creating new database !"
fi
if [[ -z "$(ls -A "$WEBSITE_PATH")" ]];then
echo "you need to mount the website path !"
exit 1
fi
exec "$@"

+ 58
- 0
encoder/encoder.go View File

@ -0,0 +1,58 @@
package encoder
import (
"encoding/json"
"fmt"
"io"
"github.com/BurntSushi/toml"
)
const (
JSON = iota
TOML
)
type Encoder interface {
Encode(v interface{}) error
}
type ExportEncoder struct {
encoder Encoder
w io.Writer
eType int
}
func (ee *ExportEncoder) Encode(v interface{}) error {
var err error
if ee.eType == TOML {
fmt.Fprintf(ee.w, "+++\n")
}
err = ee.encoder.Encode(v)
if ee.eType == TOML {
fmt.Fprintf(ee.w, "+++\n")
}
return err
}
func NewExportEncoder(w io.Writer, encType int) *ExportEncoder {
var enc Encoder
switch encType {
case JSON:
enc = json.NewEncoder(w)
case TOML:
enc = toml.NewEncoder(w)
}
return &ExportEncoder{
encoder: enc,
w: w,
eType: encType,
}
}

+ 65
- 0
export/btc_addresses.go View File

@ -0,0 +1,65 @@
package export
import (
"git.sp4ke.com/sp4ke/hugobot/v3/bitcoin"
"git.sp4ke.com/sp4ke/hugobot/v3/config"
"git.sp4ke.com/sp4ke/hugobot/v3/encoder"
"log"
"os"
"path/filepath"
qrcode "github.com/skip2/go-qrcode"
)
func ExportBTCAddresses() error {
unusedAddrs, err := bitcoin.GetAllUsedUnsyncedAddresses()
if err != nil {
return err
}
for _, a := range unusedAddrs {
//first export the qr codes
log.Println("exporting ", a)
qrFileName := a.Address + ".png"
qrCodePath := filepath.Join(config.RelBitcoinAddrContentPath(),
config.BTCQRCodesDir, qrFileName)
err := qrcode.WriteFile(a.Address, qrcode.Medium, 580, qrCodePath)
if err != nil {
return err
}
// store the address pages
filename := a.Address + ".md"
filePath := filepath.Join(config.RelBitcoinAddrContentPath(), filename)
data := map[string]interface{}{
"linked_article_id": a.LinkedArticleID,
//"resources": []map[string]interface{}{
//map[string]interface{}{
//"src": filepath.Join(config.BTCQRCodesDir, a.Address+".png"),
//},
//},
}
addressPage, err := os.Create(filePath)
if err != nil {
return err
}
tomlExporter := encoder.NewExportEncoder(addressPage, encoder.TOML)
tomlExporter.Encode(data)
// Set synced
err = a.SetSynced()
if err != nil {
return err
}
}
return nil
}

+ 17
- 0
export/bulletin.go View File

@ -0,0 +1,17 @@
package export
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
"strings"
)
func BulletinExport(exp Map, feed feeds.Feed, post posts.Post) error {
bulletinInfo := strings.Split(feed.Section, "/")
if bulletinInfo[0] == "bulletin" {
exp["bulletin_type"] = bulletinInfo[1]
}
return nil
}

+ 214
- 0
export/export.go View File

@ -0,0 +1,214 @@
package export
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
"git.sp4ke.com/sp4ke/hugobot/v3/config"
"git.sp4ke.com/sp4ke/hugobot/v3/encoder"
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/filters"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
"git.sp4ke.com/sp4ke/hugobot/v3/types"
"git.sp4ke.com/sp4ke/hugobot/v3/utils"
)
var PostMappers []PostMapper
var FeedMappers []FeedMapper
type Map map[string]interface{}
type PostMapper func(Map, feeds.Feed, posts.Post) error
type FeedMapper func(Map, feeds.Feed) error
// Exported version of a post
type PostExport struct {
ID int64 `json:"id"`
Title string `json:"title"`
Link string `json:"link"`
Published time.Time `json:"published"`
Content string `json:"content"`
}
type PostMap map[int64]Map
type FeedExport struct {
Name string `json:"name"`
Section string `json:"section"`
Categories types.StringList `json:"categories"`
Posts PostMap `json:"posts"`
}
type HugoExporter struct{}
func (he HugoExporter) Handle(feed feeds.Feed) error {
return he.export(feed)
}
func (he HugoExporter) export(feed feeds.Feed) error {
log.Printf("Exporting %s to %s", feed.Name, config.HugoData())
posts, err := posts.GetPostsByFeedId(feed.FeedID)
if err != nil {
return err
}
if len(posts) == 0 {
log.Printf("nothing to export")
return nil
}
// Run filters on posts
for _, p := range posts {
filters.RunPostFilterHooks(feed, p)
}
// Dir and filename
dirPath := filepath.Join(config.HugoData(), feed.Section)
cleanFeedName := strings.Replace(feed.Name, "/", "-", -1)
filePath := filepath.Join(dirPath, cleanFeedName+".json")
err = utils.Mkdir(dirPath)
if err != nil {
return err
}
feedExp := Map{
"name": feed.Name,
"section": feed.Section,
"categories": feed.Categories,
}
runFeedMappers(feedExp, feed)
postsMap := make(PostMap)
for _, p := range posts {
exp := Map{
"id": p.PostID,
"title": p.Title,
"link": p.Link,
"published": p.Published,
"updated": p.Updated,
//"content": p.Content,
}
runPostMappers(exp, feed, *p)
postsMap[p.PostID] = exp
}
feedExp["posts"] = postsMap
outputFile, err := os.Create(filePath)
defer outputFile.Close()
if err != nil {
return err
}
exportEncoder := encoder.NewExportEncoder(outputFile, encoder.JSON)
exportEncoder.Encode(feedExp)
//jsonEnc.Encode(feedExp)
// Handle feeds which export posts individually as hugo posts
// Like bulletin
if feed.ExportPosts {
for _, p := range posts {
exp := map[string]interface{}{
"id": p.PostID,
"title": p.Title,
"name": feed.Name,
"author": p.Author,
"description": p.PostDescription,
"externalLink": feed.UseExternalLink,
"display_name": feed.DisplayName,
"publishdate": p.Published,
"date": p.Updated,
"issuedate": utils.NextThursday(p.Updated),
"use_data": true,
"slug": p.ShortID,
"link": p.Link,
// Content is written in the post
"content": p.Content,
"categories": feed.Categories,
"tags": strings.Split(p.Tags, ","),
}
if feed.Publications != "" {
exp["publications"] = strings.Split(feed.Publications, ",")
}
runPostMappers(exp, feed, *p)
dirPath := filepath.Join(config.HugoContent(), feed.Section)
cleanFeedName := strings.Replace(feed.Name, "/", "-", -1)
fileName := fmt.Sprintf("%s-%s.md", cleanFeedName, p.ShortID)
filePath := filepath.Join(dirPath, fileName)
outputFile, err := os.Create(filePath)
defer outputFile.Close()
if err != nil {
return err
}
exportEncoder := encoder.NewExportEncoder(outputFile, encoder.TOML)
exportEncoder.Encode(exp)
}
}
return nil
}
// Runs in goroutine
func (he HugoExporter) Export(feed feeds.Feed) {
err := he.export(feed)
if err != nil {
log.Fatal(err)
}
}
func NewHugoExporter() HugoExporter {
// Make sure path exists
err := utils.Mkdir(config.HugoData())
if err != nil {
log.Fatal(err)
}
return HugoExporter{}
}
func runPostMappers(e Map, f feeds.Feed, p posts.Post) {
for _, fn := range PostMappers {
err := fn(e, f, p)
if err != nil {
log.Print(err)
}
}
}
func runFeedMappers(e Map, f feeds.Feed) {
for _, fn := range FeedMappers {
err := fn(e, f)
if err != nil {
log.Print(err)
}
}
}
func RegisterPostMapper(mapper PostMapper) {
PostMappers = append(PostMappers, mapper)
}
func RegisterFeedMapper(mapper FeedMapper) {
FeedMappers = append(FeedMappers, mapper)
}
func init() {
RegisterPostMapper(BulletinExport)
RegisterPostMapper(NewsletterPostLayout)
RegisterPostMapper(RFCExport)
RegisterPostMapper(ReleaseExport)
}

+ 18
- 0
export/newsletters.go View File

@ -0,0 +1,18 @@
package export
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
"path"
"github.com/gobuffalo/flect"
)
func NewsletterPostLayout(exp Map, feed feeds.Feed, post posts.Post) error {
section := path.Base(flect.Singularize(feed.Section))
if feed.Section == "bulletin/newsletters" {
exp["layout"] = section
}
return nil
}

+ 14
- 0
export/release.go View File

@ -0,0 +1,14 @@
package export
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
)
//
func ReleaseExport(exp Map, feed feeds.Feed, post posts.Post) error {
if feed.Section == "bulletin/releases" {
exp["data"] = post.JsonData
}
return nil
}

+ 15
- 0
export/rfc.go View File

@ -0,0 +1,15 @@
package export
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
)
// TODO: This happend in the main export file
func RFCExport(exp Map, feed feeds.Feed, post posts.Post) error {
if feed.Section == "bulletin/rfc" {
exp["data"] = post.JsonData
}
return nil
}

+ 56
- 0
export/weeks.go View File

@ -0,0 +1,56 @@
// Export all weeks to the weeks content directory
package export
import (
"git.sp4ke.com/sp4ke/hugobot/v3/config"
"git.sp4ke.com/sp4ke/hugobot/v3/encoder"
"git.sp4ke.com/sp4ke/hugobot/v3/utils"
"os"
"path/filepath"
"time"
)
const (
FirstWeek = "2017-12-07"
)
var (
WeeksContentDir = "weeks"
)
type WeekData struct {
Title string
Date time.Time
}
func ExportWeeks() error {
firstWeek, err := time.Parse("2006-01-02", FirstWeek)
if err != nil {
return err
}
WeeksTilNow := utils.GetAllThursdays(firstWeek, time.Now())
for _, week := range WeeksTilNow {
weekName := week.Format("2006-01-02")
fileName := weekName + ".md"
weekFile, err := os.Create(filepath.Join(config.HugoContent(),
WeeksContentDir,
fileName))
if err != nil {
return err
}
weekData := WeekData{
Title: weekName,
Date: week,
}
tomlExporter := encoder.NewExportEncoder(weekFile, encoder.TOML)
tomlExporter.Encode(weekData)
}
return nil
}

+ 112
- 0
feed_commands.go View File

@ -0,0 +1,112 @@
package main
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/handlers"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
"fmt"
"log"
"time"
cli "gopkg.in/urfave/cli.v1"
)
var fetchCmd = cli.Command{
Name: "fetch",
Aliases: []string{"f"},
Usage: "Fetch data from feed",
Flags: []cli.Flag{
cli.StringFlag{
Name: "since",
Usage: "Fetch data since `TIME`, defaults to last refresh time",
},
},
Action: fetchFeeds,
}
var feedsCmdGroup = cli.Command{
Name: "feeds",
Usage: "Feeds related commands. default: list feeds",
Flags: []cli.Flag{
cli.IntFlag{
Name: "id,i",
Value: 0,
Usage: "Feeds `id`",
},
},
Subcommands: []cli.Command{
fetchCmd,
},
Action: listFeeds,
}
func fetchFeeds(c *cli.Context) {
var result []*posts.Post
fList, err := getFeeds(c.Parent())
if err != nil {
log.Fatal(err)
}
for _, f := range fList {
var handler handlers.FormatHandler
handler = handlers.GetFormatHandler(*f)
if c.IsSet("since") {
// Parse time
t, err := time.Parse(time.UnixDate, c.String("since"))
if err != nil {
log.Fatal(err)
}
result, err = handler.FetchSince(f.Url, t)
} else {
result, err = handler.FetchSince(f.Url, f.LastRefresh)
}
if err != nil {
log.Fatal(err)
}
for _, post := range result {
log.Printf("%s (updated: %s)", post.Title, post.Updated)
}
log.Println("Total: ", len(result))
}
}
func listFeeds(c *cli.Context) {
fList, err := getFeeds(c)
if err != nil {
log.Fatal(err)
}
for _, f := range fList {
fmt.Println(f)
}
}
func getFeeds(c *cli.Context) ([]*feeds.Feed, error) {
var fList []*feeds.Feed
var err error
if c.IsSet("id") {
feed, err := feeds.GetById(c.Int64("id"))
if err != nil {
return nil, err
}
fList = append(fList, feed)
} else {
fList, err = feeds.ListFeeds()
if err != nil {
return nil, err
}
}
return fList, nil
}

+ 117
- 0
feeds/ctrl.go View File

@ -0,0 +1,117 @@
package feeds
import (
"git.sp4ke.com/sp4ke/hugobot/v3/types"
"log"
"net/http"
"strconv"
"github.com/gin-gonic/gin"
sqlite3 "github.com/mattn/go-sqlite3"
)
const (
MsgOK = "OK"
)
var (
ErrNotInt = "expected int"
)
type FeedCtrl struct{}
func (ctrl FeedCtrl) Create(c *gin.Context) {
var feedForm FeedForm
feedModel := new(Feed)
if err := c.ShouldBindJSON(&feedForm); err != nil {
c.JSON(http.StatusNotAcceptable, gin.H{
"status": http.StatusNotAcceptable,
"message": "invalid form",
"form": feedForm})
c.Abort()
return
}
feedModel.Name = feedForm.Name
feedModel.Url = feedForm.Url
feedModel.Format = feedForm.Format
feedModel.Section = feedForm.Section
feedModel.Categories = types.StringList(feedForm.Categories)
err := feedModel.Write()
if err != nil {
log.Println(err)
c.JSON(http.StatusNotAcceptable,
gin.H{"status": http.StatusNotAcceptable, "error": err.Error()})
c.Abort()
return
}
c.JSON(http.StatusOK, gin.H{"status": http.StatusOK, "message": MsgOK})
}
func (ctrl FeedCtrl) List(c *gin.Context) {
feeds, err := ListFeeds()
if err != nil {
c.JSON(http.StatusNotAcceptable, gin.H{
"error": err.Error(),
"status": http.StatusNotAcceptable,
})
c.Abort()
return
}
c.JSON(http.StatusOK, gin.H{"status": http.StatusOK, "result": feeds})
}
func (ctrl FeedCtrl) Delete(c *gin.Context) {
id, err := strconv.Atoi(c.Param("id"))
if err != nil {
c.JSON(http.StatusNotAcceptable, gin.H{
"error": ErrNotInt,
"status": http.StatusNotAcceptable,
})
c.Abort()
return
}
err = DeleteById(id)
sqlErr, isSqlErr := err.(sqlite3.Error)
if err != nil {
if isSqlErr {
c.JSON(http.StatusInternalServerError,
gin.H{
"error": sqlErr.Error(),
"status": http.StatusInternalServerError,
})
} else {
var status int
switch err {
case ErrDoesNotExist:
status = http.StatusNotFound
default:
status = http.StatusInternalServerError
}
c.JSON(status,
gin.H{"error": err.Error(), "status": status})
}
c.Abort()
return
}
c.JSON(http.StatusOK, gin.H{"status": http.StatusOK, "message": MsgOK})
}

+ 262
- 0
feeds/feed.go View File

@ -0,0 +1,262 @@
package feeds
import (
"git.sp4ke.com/sp4ke/hugobot/v3/db"
"git.sp4ke.com/sp4ke/hugobot/v3/types"
"errors"
"log"
"time"
sqlite3 "github.com/mattn/go-sqlite3"
)
//sqlite> SELECT feeds.name, url, feed_formats.name AS format_name from feeds JOIN feed_formats ON feeds.format = feed_formats.id;
//
var DB = db.DB
const (
DBFeedSchema = `CREATE TABLE IF NOT EXISTS feeds (
feed_id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
display_name TEXT DEFAULT '',
publications TEXT DEFAULT '',
section TEXT DEFAULT '',
categories TEXT DEFAULT '',
description TEXT DEFAULT '',
url TEXT NOT NULL,
export_posts INTEGER DEFAULT 0,
last_refresh timestamp DEFAULT -1,
created timestamp DEFAULT (strftime('%s')),
interval INTEGER DEFAULT 60,
format INTEGER NOT NULL DEFAULT 0,
serial_run INTEGER DEFAULT 0,
use_external_link INTEGER DEFAULT 0,
FOREIGN KEY (format) REFERENCES feed_formats(id)
)`
DBFeedFormatsSchema = `CREATE TABLE IF NOT EXISTS feed_formats (
id INTEGER PRIMARY KEY,
format_name TEXT NOT NULL UNIQUE
)`
)
const (
QDeleteFeedById = `DELETE FROM feeds WHERE feed_id = ?`
QGetFeed = `SELECT * FROM feeds WHERE feed_id = ?`
QGetFeedByName = `SELECT * FROM feeds WHERE name = ?`
QGetFeedByURL = `SELECT * FROM feeds WHERE url = ?`
QListFeeds = `SELECT
feeds.feed_id,
feeds.name,
feeds.display_name,
feeds.publications,
feeds.section,
feeds.categories,
feeds.description,
feeds.url,
feeds.last_refresh,
feeds.created,
feeds.format,
feeds.serial_run,
feeds.use_external_link,
feeds.interval,
feeds.export_posts,
feed_formats.format_name
FROM feeds
JOIN feed_formats ON feeds.format = feed_formats.id`
)
var (
ErrDoesNotExist = errors.New("does not exist")
ErrAlreadyExists = errors.New("already exists")
)
type FeedFormat int
// Feed Formats
const (
FormatRSS FeedFormat = iota
FormatHTML
FormatJSON
FormatTweet
FormatRFC
FormatGHRelease
)
var FeedFormats = map[FeedFormat]string{
FormatRSS: "RSS",
FormatHTML: "HTML",
FormatJSON: "JSON",
FormatTweet: "TWEET",
FormatRFC: "RFC",
FormatGHRelease: "GithubRelease",
}
type Feed struct {
FeedID int64 `json:"id" db:"feed_id"`
Name string `json:"name" db:"name"`
Section string `json:"section,omitempty"`
Categories types.StringList `json:"categories,omitempty"`
Description string `json:"description"`
Url string `json:"url"`
Format FeedFormat `json:"-"`
FormatString string `json:"format" db:"format_name"`
LastRefresh time.Time `db:"last_refresh" json:"last_refresh"` // timestamp time.Unix()
Created time.Time `json:"created"`
DisplayName string `db:"display_name"`
Publications string `json:"-"`
// This feed's posts should also be exported individually
ExportPosts bool `json:"export_posts" db:"export_posts"`
// Time in seconds between each polling job on the news feed
Interval float64 `json:"refresh_interval"`
Serial bool `json:"serial" db:"serial_run"` // Jobs for this feed should run in series
// Items which only contain summaries and redirect to external content
// like publications and newsletters
UseExternalLink bool `json:"use_external_link" db:"use_external_link"`
}
func (f *Feed) Write() error {
query := `INSERT INTO feeds
(name, section, categories, url, format)
VALUES(:name, :section, :categories, :url, :format)`
_, err := DB.Handle.NamedExec(query, f)
sqlErr, isSqlErr := err.(sqlite3.Error)
if isSqlErr && sqlErr.Code == sqlite3.ErrConstraint {
return ErrAlreadyExists
}
if err != nil {
return err
}
return nil
}
func (f *Feed) UpdateRefreshTime(time time.Time) error {
f.LastRefresh = time
query := `UPDATE feeds SET last_refresh = ? WHERE feed_id = ?`
_, err := DB.Handle.Exec(query, f.LastRefresh, f.FeedID)
if err != nil {
return err
}
return nil
}
func GetById(id int64) (*Feed, error) {
var feed Feed
err := DB.Handle.Get(&feed, QGetFeed, id)
if err != nil {
return nil, err
}
feed.FormatString = FeedFormats[feed.Format]
return &feed, nil
}
func GetByName(name string) (*Feed, error) {
var feed Feed
err := DB.Handle.Get(&feed, QGetFeedByName, name)
if err != nil {
return nil, err
}
feed.FormatString = FeedFormats[feed.Format]
return &feed, nil
}
func GetByURL(url string) (*Feed, error) {
var feed Feed
err := DB.Handle.Get(&feed, QGetFeedByURL, url)
if err != nil {
return nil, err
}
feed.FormatString = FeedFormats[feed.Format]
return &feed, nil
}
func ListFeeds() ([]*Feed, error) {
var feeds []*Feed
err := DB.Handle.Select(&feeds, QListFeeds)
if err != nil {
return nil, err
}
return feeds, nil
}
func DeleteById(id int) error {
// If id does not exists return warning
var feedToDelete Feed
err := DB.Handle.Get(&feedToDelete, QGetFeed, id)
if err != nil {
return ErrDoesNotExist
}
_, err = DB.Handle.Exec(QDeleteFeedById, id)
if err != nil {
return err
}
return nil
}
// Returns true if the feed should be refreshed
func (feed *Feed) ShouldRefresh() (float64, bool) {
lastRefresh := feed.LastRefresh
delta := time.Since(lastRefresh).Seconds() // Delta since last refresh
//log.Printf("%s delta %f >= interval %f ?", feed.Name, delta, feed.Interval)
//
//
//log.Printf("refresh %s in %.0f seconds", feed.Name, feed.Interval-delta)
return delta, delta >= feed.Interval
}
func init() {
_, err := DB.Handle.Exec(DBFeedSchema)
if err != nil {
log.Fatal(err)
}
_, err = DB.Handle.Exec(DBFeedFormatsSchema)
if err != nil {
log.Fatal(err)
}
// Populate feed formats
query := `INSERT INTO feed_formats (id, format_name) VALUES (?, ?)`
for k, v := range FeedFormats {
_, err := DB.Handle.Exec(query, k, v)
if err != nil {
sqlErr, ok := err.(sqlite3.Error)
if ok && sqlErr.ExtendedCode == sqlite3.ErrConstraintUnique {
log.Panic(err)
}
if !ok {
log.Panic(err)
}
}
}
}

+ 9
- 0
feeds/form.go View File

@ -0,0 +1,9 @@
package feeds
type FeedForm struct {
Name string `form:"name" binding:"required"`
Url string `form:"url" binding:"required"`
Format FeedFormat `form:"format"`
Categories []string `form:"categories"`
Section string `form:"section"`
}

+ 26
- 0
filters/filters.go View File

@ -0,0 +1,26 @@
package filters
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
"log"
)
type FilterHook func(feed feeds.Feed, post *posts.Post) error
var (
PostFilters []FilterHook
)
func RegisterPostFilterHook(hook FilterHook) {
PostFilters = append(PostFilters, hook)
}
func RunPostFilterHooks(feed feeds.Feed, post *posts.Post) {
for _, h := range PostFilters {
err := h(feed, post)
if err != nil {
log.Fatal(err)
}
}
}

+ 65
- 0
filters/mailchimp_filter.go View File

@ -0,0 +1,65 @@
package filters
import (
"git.sp4ke.com/sp4ke/hugobot/v3/feeds"
"git.sp4ke.com/sp4ke/hugobot/v3/posts"
"strings"
"github.com/PuerkitoBio/goquery"
)
const (
PreviewTextSel = ".mcnPreviewText"
)
var (
RemoveSelectors = []string{"style", ".footerContainer", "#awesomewrap", "#templatePreheader", "img", "head"}
)
func mailChimpFilter(feed feeds.Feed, post *posts.Post) error {
// Nothing to do for empty content
if post.PostDescription == post.Content &&
post.Content == "" {
return nil
}
// Same content in both
if post.PostDescription == post.Content {
post.PostDescription = ""
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(post.Content))
if err != nil {
return err
}
sel := doc.Find(strings.Join(RemoveSelectors, ","))
sel.Remove()
post.Content, err = doc.Html()
return err
}
func extractPreviewText(feed feeds.Feed, post *posts.Post) error {
// Ignore filled description
if post.PostDescription != "" {
return nil