Remove unused URI parameter from constructor

pull/440/merge
David A Roberts 6 years ago committed by Gijs
parent 5ee03bc960
commit d60184966c

@ -16,23 +16,14 @@ Please make sure to run [eslint](http://eslint.org/) against any proposed change
## Usage
To parse a document, you must create a new `Readability` object from a URI object and a document object, and then call `parse()`. Here's an example:
To parse a document, you must create a new `Readability` object from a document object, and then call `parse()`. Here's an example:
```javascript
var loc = document.location;
var uri = {
spec: loc.href,
host: loc.host,
prePath: loc.protocol + "//" + loc.host,
scheme: loc.protocol.substr(0, loc.protocol.indexOf(":")),
pathBase: loc.protocol + "//" + loc.host + loc.pathname.substr(0, loc.pathname.lastIndexOf("/") + 1)
};
var article = new Readability(uri, document).parse();
var article = new Readability(document).parse();
```
This `article` object will contain the following properties:
* `uri`: original `uri` object that was passed to constructor
* `title`: article title
* `content`: HTML string of processed article content
* `length`: length of article, in characters
@ -42,15 +33,6 @@ This `article` object will contain the following properties:
If you're using Readability on the web, you will likely be able to use a `document` reference from elsewhere (e.g. fetched via XMLHttpRequest, in a same-origin `<iframe>` you have access to, etc.). Otherwise, you would need to construct such an object using a DOM parser such as [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do not recommend it for general use.
Until [#346](https://github.com/mozilla/readability/issues/346) is fixed, if you're using `jsdom` and `node` (rather than running in a browser), you will need to also ensure a global `Node` object is available for `Readability` to use, e.g. like so:
```javascript
...
var dom = new JSDOM(html, OPTIONS);
Node = dom.window.Node;
var article = new Readability(uri, dom.window.document).parse();
```
### Optional
Readability's `parse()` works by modifying the DOM. This removes some elements in the web page. You could avoid this by passing the clone of the `document` object while creating a `Readability` object.
@ -58,7 +40,7 @@ Readability's `parse()` works by modifying the DOM. This removes some elements i
```
var documentClone = document.cloneNode(true);
var article = new Readability(uri, documentClone).parse();
var article = new Readability(documentClone).parse();
```
## Tests

@ -22,14 +22,17 @@
/**
* Public constructor.
* @param {Object} uri The URI descriptor object.
* @param {HTMLDocument} doc The document to parse.
* @param {Object} options The options object.
*/
function Readability(uri, doc, options) {
function Readability(doc, options) {
// In some older versions, people passed a URI object as the first argument. Cope:
if (doc && !doc.documentElement && doc.spec) {
doc = options;
options = arguments[2];
}
options = options || {};
this._uri = uri;
this._doc = doc;
this._articleTitle = null;
this._articleByline = null;
@ -1748,7 +1751,6 @@ Readability.prototype = {
var textContent = articleContent.textContent;
return {
uri: this._uri,
title: this._articleTitle,
byline: metadata.byline || this._articleByline,
dir: this._articleDir,

@ -42,17 +42,10 @@ suite("Readability test page perf", function () {
set("iterations", 1);
set("type", "static");
var uri = {
spec: "http://fakehost/test/page.html",
host: "fakehost",
prePath: "http://fakehost",
scheme: "http",
pathBase: "http://fakehost/test"
};
testPages.forEach(function(testPage) {
var doc = new JSDOMParser().parse(testPage.source);
bench(testPage.dir + " readability perf", function() {
new Readability(uri, doc).parse();
new Readability(doc).parse();
});
});
});
@ -61,17 +54,10 @@ suite("isProbablyReaderable perf", function () {
set("iterations", 1);
set("type", "static");
var uri = {
spec: "http://fakehost/test/page.html",
host: "fakehost",
prePath: "http://fakehost",
scheme: "http",
pathBase: "http://fakehost/test"
};
testPages.forEach(function(testPage) {
var doc = new JSDOMParser().parse(testPage.source);
bench(testPage.dir + " readability perf", function() {
new Readability(uri, doc).isProbablyReaderable();
new Readability(doc).isProbablyReaderable();
});
});
});

@ -100,19 +100,13 @@ function onResponseReceived(source) {
}
function runReadability(source, destPath, metadataDestPath) {
var uri = {
spec: "http://fakehost/test/page.html",
host: "fakehost",
prePath: "http://fakehost",
scheme: "http",
pathBase: "http://fakehost/test/"
};
var doc = new JSDOMParser().parse(source, uri.spec);
var uri = "http://fakehost/test/page.html";
var doc = new JSDOMParser().parse(source, uri);
var myReader, result, readerable;
try {
// We pass `caption` as a class to check that passing in extra classes works,
// given that it appears in some of the test documents.
myReader = new Readability(uri, doc, { classesToPreserve: ["caption"] });
myReader = new Readability(doc, { classesToPreserve: ["caption"] });
result = myReader.parse();
} catch (ex) {
console.error(ex);
@ -126,7 +120,7 @@ function runReadability(source, destPath, metadataDestPath) {
ProcessExternalResources: false
}
});
myReader = new Readability(uri, jsdomDoc);
myReader = new Readability(jsdomDoc);
readerable = myReader.isProbablyReaderable();
} catch (ex) {
console.error(ex);
@ -144,7 +138,6 @@ function runReadability(source, destPath, metadataDestPath) {
}
// Delete the result data we don't care about checking.
delete result.uri;
delete result.content;
delete result.textContent;
delete result.length;

@ -49,7 +49,7 @@ function htmlTransform(str) {
return str.replace(/\s+/g, " ");
}
function runTestsWithItems(label, domGenerationFn, uri, source, expectedContent, expectedMetadata) {
function runTestsWithItems(label, domGenerationFn, source, expectedContent, expectedMetadata) {
describe(label, function() {
this.timeout(10000);
@ -60,7 +60,7 @@ function runTestsWithItems(label, domGenerationFn, uri, source, expectedContent,
var doc = domGenerationFn(source);
// Provide one class name to preserve, which we know appears in a few
// of the test documents.
var myReader = new Readability(uri, doc, { classesToPreserve: ["caption"] });
var myReader = new Readability(doc, { classesToPreserve: ["caption"] });
// Needs querySelectorAll function to test isProbablyReaderable method.
// jsdom implements querySelector but JSDOMParser doesn't.
var readerable = label === "jsdom" ? myReader.isProbablyReaderable() : null;
@ -191,18 +191,18 @@ function removeCommentNodesRecursively(node) {
describe("Readability API", function() {
describe("#constructor", function() {
it("should accept a debug option", function() {
expect(new Readability({}, {})._debug).eql(false);
expect(new Readability({}, {}, {debug: true})._debug).eql(true);
expect(new Readability({})._debug).eql(false);
expect(new Readability({}, {debug: true})._debug).eql(true);
});
it("should accept a nbTopCandidates option", function() {
expect(new Readability({}, {})._nbTopCandidates).eql(5);
expect(new Readability({}, {}, {nbTopCandidates: 42})._nbTopCandidates).eql(42);
expect(new Readability({})._nbTopCandidates).eql(5);
expect(new Readability({}, {nbTopCandidates: 42})._nbTopCandidates).eql(42);
});
it("should accept a maxElemsToParse option", function() {
expect(new Readability({}, {})._maxElemsToParse).eql(0);
expect(new Readability({}, {}, {maxElemsToParse: 42})._maxElemsToParse).eql(42);
expect(new Readability({})._maxElemsToParse).eql(0);
expect(new Readability({}, {maxElemsToParse: 42})._maxElemsToParse).eql(42);
});
});
@ -210,7 +210,7 @@ describe("Readability API", function() {
it("shouldn't parse oversized documents as per configuration", function() {
var doc = new JSDOMParser().parse("<html><div>yo</div></html>");
expect(function() {
new Readability({}, doc, {maxElemsToParse: 1}).parse();
new Readability(doc, {maxElemsToParse: 1}).parse();
}).to.Throw("Aborting parsing document; 2 elements found");
});
});
@ -219,17 +219,11 @@ describe("Readability API", function() {
describe("Test pages", function() {
testPages.forEach(function(testPage) {
describe(testPage.dir, function() {
var uri = {
spec: "http://fakehost/test/page.html",
host: "fakehost",
prePath: "http://fakehost",
scheme: "http",
pathBase: "http://fakehost/test/"
};
var uri = "http://fakehost/test/page.html";
runTestsWithItems("jsdom", function(source) {
var doc = jsdom(source, {
url: uri.spec,
url: uri,
features: {
FetchExternalResources: false,
ProcessExternalResources: false
@ -237,17 +231,17 @@ describe("Test pages", function() {
});
removeCommentNodesRecursively(doc);
return doc;
}, uri, testPage.source, testPage.expectedContent, testPage.expectedMetadata);
}, testPage.source, testPage.expectedContent, testPage.expectedMetadata);
runTestsWithItems("JSDOMParser", function(source) {
var parser = new JSDOMParser();
var doc = parser.parse(source, uri.spec);
var doc = parser.parse(source, uri);
if (parser.errorState) {
console.error("Parsing this DOM caused errors:", parser.errorState);
return null;
}
return doc;
}, uri, testPage.source, testPage.expectedContent, testPage.expectedMetadata);
}, testPage.source, testPage.expectedContent, testPage.expectedMetadata);
});
});
});

Loading…
Cancel
Save