mirror of https://github.com/namecoin/ncdns
ncdumpzone: Add mode for a URL list.
This can be used as input for a YaCy crawl job.pull/93/head
parent
3b7ffcbc45
commit
39fbbc0ec5
@ -0,0 +1,41 @@
|
|||||||
|
package rrtourl
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/miekg/dns"
|
||||||
|
"github.com/namecoin/ncdns/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
// URLsFromRR returns a list of URL's derived from rr, which is suitable for
|
||||||
|
// passing to a search engine crawler like YaCy. If no such list can be
|
||||||
|
// derived, returns an empty string.
|
||||||
|
func URLsFromRR(rr dns.RR) (string, error) {
|
||||||
|
header := rr.Header()
|
||||||
|
if header == nil {
|
||||||
|
return "", fmt.Errorf("Nil RR header")
|
||||||
|
}
|
||||||
|
|
||||||
|
hostFQDN := header.Name
|
||||||
|
|
||||||
|
// Remove things like "_443._tcp" in TLSA records
|
||||||
|
for strings.HasPrefix(hostFQDN, "_") {
|
||||||
|
_, hostFQDN = util.SplitDomainTail(hostFQDN)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the trailing period from FQDN's
|
||||||
|
host := strings.TrimSuffix(hostFQDN, ".")
|
||||||
|
|
||||||
|
// Remove wildcard subdomains (later we assume that they might be "www.")
|
||||||
|
host = strings.TrimPrefix(host, "*.")
|
||||||
|
|
||||||
|
return "http://" + host + "/" + "\n" +
|
||||||
|
"http://www." + host + "/" + "\n" +
|
||||||
|
"https://" + host + "/" + "\n" +
|
||||||
|
"https://www." + host + "/" + "\n" +
|
||||||
|
"ftp://" + host + "/" + "\n" +
|
||||||
|
"ftp://www." + host + "/" + "\n" +
|
||||||
|
"ftps://" + host + "/" + "\n" +
|
||||||
|
"ftps://www." + host + "/" + "\n", nil
|
||||||
|
}
|
Loading…
Reference in New Issue