ncdumpzone: Add mode for a URL list.

This can be used as input for a YaCy crawl job.
pull/93/head
JeremyRand 5 years ago
parent 3b7ffcbc45
commit 39fbbc0ec5
No known key found for this signature in database
GPG Key ID: B3F2D165786D6570

@ -11,6 +11,7 @@ import (
extratypes "github.com/hlandau/ncbtcjsontypes"
"github.com/namecoin/ncdns/namecoin"
"github.com/namecoin/ncdns/ncdomain"
"github.com/namecoin/ncdns/rrtourl"
"github.com/namecoin/ncdns/tlsoverridefirefox"
"github.com/namecoin/ncdns/util"
)
@ -29,6 +30,12 @@ func dumpRR(rr dns.RR, dest io.Writer, format string) error {
return err
}
fmt.Fprint(dest, result)
case "url-list":
result, err := rrtourl.URLsFromRR(rr)
if err != nil {
return err
}
fmt.Fprint(dest, result)
}
return nil
@ -77,7 +84,8 @@ func dumpName(item *extratypes.NameFilterItem, conn namecoin.Conn,
// Dump extracts all domain names from conn, formats them according to the
// specified format, and writes the result to dest.
func Dump(conn namecoin.Conn, dest io.Writer, format string) error {
if format != "zonefile" && format != "firefox-override" {
if format != "zonefile" && format != "firefox-override" &&
format != "url-list" {
return fmt.Errorf("Invalid \"format\" argument: %s", format)
}

@ -23,7 +23,8 @@ var (
"Namecoin RPC password")
formatFlag = cflag.String(flagGroup, "format", "zonefile", "Output "+
"format. \"zonefile\" = DNS zone file. "+
"\"firefox-override\" = Firefox cert_override.txt format.")
"\"firefox-override\" = Firefox cert_override.txt format. "+
"\"url-list\" = URL list.")
)
var conn namecoin.Conn

@ -0,0 +1,41 @@
package rrtourl
import (
"fmt"
"strings"
"github.com/miekg/dns"
"github.com/namecoin/ncdns/util"
)
// URLsFromRR returns a list of URL's derived from rr, which is suitable for
// passing to a search engine crawler like YaCy. If no such list can be
// derived, returns an empty string.
func URLsFromRR(rr dns.RR) (string, error) {
header := rr.Header()
if header == nil {
return "", fmt.Errorf("Nil RR header")
}
hostFQDN := header.Name
// Remove things like "_443._tcp" in TLSA records
for strings.HasPrefix(hostFQDN, "_") {
_, hostFQDN = util.SplitDomainTail(hostFQDN)
}
// Remove the trailing period from FQDN's
host := strings.TrimSuffix(hostFQDN, ".")
// Remove wildcard subdomains (later we assume that they might be "www.")
host = strings.TrimPrefix(host, "*.")
return "http://" + host + "/" + "\n" +
"http://www." + host + "/" + "\n" +
"https://" + host + "/" + "\n" +
"https://www." + host + "/" + "\n" +
"ftp://" + host + "/" + "\n" +
"ftp://www." + host + "/" + "\n" +
"ftps://" + host + "/" + "\n" +
"ftps://www." + host + "/" + "\n", nil
}
Loading…
Cancel
Save