Clean IDs and classes from output.

pull/408/head
Cameron McCormack 7 years ago committed by Gijs
parent 19b9f9de14
commit 6729538c77

@ -35,11 +35,12 @@ function Readability(uri, doc, options) {
this._articleByline = null;
this._articleDir = null;
// Configureable options
// Configurable options
this._debug = !!options.debug;
this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
this._wordThreshold = options.wordThreshold || this.DEFAULT_WORD_THRESHOLD;
this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []);
// Start with all flags set
this._flags = this.FLAG_STRIP_UNLIKELYS |
@ -123,6 +124,10 @@ Readability.prototype = {
DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [ "TABLE", "TH", "TD", "HR", "PRE" ],
// These are the IDs and classes that readability sets itself.
IDS_TO_PRESERVE: [ "readability-content", "readability-page-1" ],
CLASSES_TO_PRESERVE: [ "readability-styled", "page" ],
/**
* Run any post-process modifications to article content as necessary.
*
@ -132,6 +137,9 @@ Readability.prototype = {
_postProcessContent: function(articleContent) {
// Readability cannot open relative uris so we convert them to absolute uris.
this._fixRelativeUris(articleContent);
// Remove IDs and classes.
this._cleanIDsAndClasses(articleContent);
},
/**
@ -225,6 +233,38 @@ Readability.prototype = {
}));
},
/**
* Removes the id="" and class="" attribute from every element in the given
* subtree, except those that match IDS_TO_PRESERVE, CLASSES_TO_PRESERVE and
* the classesToPreserve array from the options object.
*
* @param Element
* @return void
*/
_cleanIDsAndClasses: function(node) {
if (this.IDS_TO_PRESERVE.indexOf(node.id) == -1) {
node.removeAttribute("id");
}
var classesToPreserve = this._classesToPreserve;
var className = node.className
.split(/\s+/)
.filter(function(cls) {
return classesToPreserve.indexOf(cls) != -1;
})
.join(" ");
if (className) {
node.className = className;
} else {
node.removeAttribute("class");
}
for (node = node.firstElementChild; node; node = node.nextElementSibling) {
this._cleanIDsAndClasses(node);
}
},
/**
* Converts each <a> and <img> uri in the given element to an absolute URI,
* ignoring #ref URIs.

Loading…
Cancel
Save