ncdumpzone: Add mode for a URL list.

This can be used as input for a YaCy crawl job.
pull/93/head
JeremyRand 5 years ago
parent 3b7ffcbc45
commit 39fbbc0ec5
No known key found for this signature in database
GPG Key ID: B3F2D165786D6570

@ -11,6 +11,7 @@ import (
extratypes "github.com/hlandau/ncbtcjsontypes" extratypes "github.com/hlandau/ncbtcjsontypes"
"github.com/namecoin/ncdns/namecoin" "github.com/namecoin/ncdns/namecoin"
"github.com/namecoin/ncdns/ncdomain" "github.com/namecoin/ncdns/ncdomain"
"github.com/namecoin/ncdns/rrtourl"
"github.com/namecoin/ncdns/tlsoverridefirefox" "github.com/namecoin/ncdns/tlsoverridefirefox"
"github.com/namecoin/ncdns/util" "github.com/namecoin/ncdns/util"
) )
@ -29,6 +30,12 @@ func dumpRR(rr dns.RR, dest io.Writer, format string) error {
return err return err
} }
fmt.Fprint(dest, result) fmt.Fprint(dest, result)
case "url-list":
result, err := rrtourl.URLsFromRR(rr)
if err != nil {
return err
}
fmt.Fprint(dest, result)
} }
return nil return nil
@ -77,7 +84,8 @@ func dumpName(item *extratypes.NameFilterItem, conn namecoin.Conn,
// Dump extracts all domain names from conn, formats them according to the // Dump extracts all domain names from conn, formats them according to the
// specified format, and writes the result to dest. // specified format, and writes the result to dest.
func Dump(conn namecoin.Conn, dest io.Writer, format string) error { func Dump(conn namecoin.Conn, dest io.Writer, format string) error {
if format != "zonefile" && format != "firefox-override" { if format != "zonefile" && format != "firefox-override" &&
format != "url-list" {
return fmt.Errorf("Invalid \"format\" argument: %s", format) return fmt.Errorf("Invalid \"format\" argument: %s", format)
} }

@ -23,7 +23,8 @@ var (
"Namecoin RPC password") "Namecoin RPC password")
formatFlag = cflag.String(flagGroup, "format", "zonefile", "Output "+ formatFlag = cflag.String(flagGroup, "format", "zonefile", "Output "+
"format. \"zonefile\" = DNS zone file. "+ "format. \"zonefile\" = DNS zone file. "+
"\"firefox-override\" = Firefox cert_override.txt format.") "\"firefox-override\" = Firefox cert_override.txt format. "+
"\"url-list\" = URL list.")
) )
var conn namecoin.Conn var conn namecoin.Conn

@ -0,0 +1,41 @@
package rrtourl
import (
"fmt"
"strings"
"github.com/miekg/dns"
"github.com/namecoin/ncdns/util"
)
// URLsFromRR returns a list of URL's derived from rr, which is suitable for
// passing to a search engine crawler like YaCy. If no such list can be
// derived, returns an empty string.
func URLsFromRR(rr dns.RR) (string, error) {
header := rr.Header()
if header == nil {
return "", fmt.Errorf("Nil RR header")
}
hostFQDN := header.Name
// Remove things like "_443._tcp" in TLSA records
for strings.HasPrefix(hostFQDN, "_") {
_, hostFQDN = util.SplitDomainTail(hostFQDN)
}
// Remove the trailing period from FQDN's
host := strings.TrimSuffix(hostFQDN, ".")
// Remove wildcard subdomains (later we assume that they might be "www.")
host = strings.TrimPrefix(host, "*.")
return "http://" + host + "/" + "\n" +
"http://www." + host + "/" + "\n" +
"https://" + host + "/" + "\n" +
"https://www." + host + "/" + "\n" +
"ftp://" + host + "/" + "\n" +
"ftp://www." + host + "/" + "\n" +
"ftps://" + host + "/" + "\n" +
"ftps://www." + host + "/" + "\n", nil
}
Loading…
Cancel
Save