From 39fbbc0ec57ffcee4b7d825e88bb010e741709b9 Mon Sep 17 00:00:00 2001 From: JeremyRand Date: Wed, 27 Feb 2019 22:20:09 +0000 Subject: [PATCH] ncdumpzone: Add mode for a URL list. This can be used as input for a YaCy crawl job. --- ncdumpzone/ncdumpzone.go | 10 ++++++- ncdumpzone/ncdumpzone/ncdumpzone.go | 3 ++- rrtourl/rrtourl.go | 41 +++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 rrtourl/rrtourl.go diff --git a/ncdumpzone/ncdumpzone.go b/ncdumpzone/ncdumpzone.go index 94a1748..d70ac26 100644 --- a/ncdumpzone/ncdumpzone.go +++ b/ncdumpzone/ncdumpzone.go @@ -11,6 +11,7 @@ import ( extratypes "github.com/hlandau/ncbtcjsontypes" "github.com/namecoin/ncdns/namecoin" "github.com/namecoin/ncdns/ncdomain" + "github.com/namecoin/ncdns/rrtourl" "github.com/namecoin/ncdns/tlsoverridefirefox" "github.com/namecoin/ncdns/util" ) @@ -29,6 +30,12 @@ func dumpRR(rr dns.RR, dest io.Writer, format string) error { return err } fmt.Fprint(dest, result) + case "url-list": + result, err := rrtourl.URLsFromRR(rr) + if err != nil { + return err + } + fmt.Fprint(dest, result) } return nil @@ -77,7 +84,8 @@ func dumpName(item *extratypes.NameFilterItem, conn namecoin.Conn, // Dump extracts all domain names from conn, formats them according to the // specified format, and writes the result to dest. func Dump(conn namecoin.Conn, dest io.Writer, format string) error { - if format != "zonefile" && format != "firefox-override" { + if format != "zonefile" && format != "firefox-override" && + format != "url-list" { return fmt.Errorf("Invalid \"format\" argument: %s", format) } diff --git a/ncdumpzone/ncdumpzone/ncdumpzone.go b/ncdumpzone/ncdumpzone/ncdumpzone.go index 839d2fe..99fadee 100644 --- a/ncdumpzone/ncdumpzone/ncdumpzone.go +++ b/ncdumpzone/ncdumpzone/ncdumpzone.go @@ -23,7 +23,8 @@ var ( "Namecoin RPC password") formatFlag = cflag.String(flagGroup, "format", "zonefile", "Output "+ "format. \"zonefile\" = DNS zone file. "+ - "\"firefox-override\" = Firefox cert_override.txt format.") + "\"firefox-override\" = Firefox cert_override.txt format. "+ + "\"url-list\" = URL list.") ) var conn namecoin.Conn diff --git a/rrtourl/rrtourl.go b/rrtourl/rrtourl.go new file mode 100644 index 0000000..6fa11bd --- /dev/null +++ b/rrtourl/rrtourl.go @@ -0,0 +1,41 @@ +package rrtourl + +import ( + "fmt" + "strings" + + "github.com/miekg/dns" + "github.com/namecoin/ncdns/util" +) + +// URLsFromRR returns a list of URL's derived from rr, which is suitable for +// passing to a search engine crawler like YaCy. If no such list can be +// derived, returns an empty string. +func URLsFromRR(rr dns.RR) (string, error) { + header := rr.Header() + if header == nil { + return "", fmt.Errorf("Nil RR header") + } + + hostFQDN := header.Name + + // Remove things like "_443._tcp" in TLSA records + for strings.HasPrefix(hostFQDN, "_") { + _, hostFQDN = util.SplitDomainTail(hostFQDN) + } + + // Remove the trailing period from FQDN's + host := strings.TrimSuffix(hostFQDN, ".") + + // Remove wildcard subdomains (later we assume that they might be "www.") + host = strings.TrimPrefix(host, "*.") + + return "http://" + host + "/" + "\n" + + "http://www." + host + "/" + "\n" + + "https://" + host + "/" + "\n" + + "https://www." + host + "/" + "\n" + + "ftp://" + host + "/" + "\n" + + "ftp://www." + host + "/" + "\n" + + "ftps://" + host + "/" + "\n" + + "ftps://www." + host + "/" + "\n", nil +}