diff --git a/Readability.js b/Readability.js index 2d94141..35e0d7d 100644 --- a/Readability.js +++ b/Readability.js @@ -577,10 +577,12 @@ Readability.prototype = { this._clean(articleContent, "link"); this._clean(articleContent, "aside"); - // Clean out elements have "share" in their id/class combinations from final top candidates, + // Clean out elements with little content that have "share" in their id/class combinations from final top candidates, // which means we don't remove the top candidates even they have "share". - this._forEachNode(articleContent.children, function(topCandidate) { - this._cleanMatchedNodes(topCandidate, /share/); + this._forEachNode(articleContent.children, function (topCandidate) { + this._cleanMatchedNodes(topCandidate, function (node, matchString) { + return /share/.test(matchString) && node.textContent.length < 500; + }); }); // If there is only one h2 and its text content substantially equals article title, @@ -1685,17 +1687,17 @@ Readability.prototype = { }, /** - * Clean out elements whose id/class combinations match specific string. + * Clean out elements that match the specified conditions * * @param Element - * @param RegExp match id/class combination. + * @param Function determines whether a node should be removed * @return void **/ - _cleanMatchedNodes: function(e, regex) { + _cleanMatchedNodes: function(e, filter) { var endOfSearchMarkerNode = this._getNextNode(e, true); var next = this._getNextNode(e); while (next && next != endOfSearchMarkerNode) { - if (regex.test(next.className + " " + next.id)) { + if (filter(next, next.className + " " + next.id)) { next = this._removeAndGetNext(next); } else { next = this._getNextNode(next);