Readability on npm (#608)

* Initial work on preparing Readability for npm

* Adjust some require()s

* Point package.json to index.js

* Add Node.js instructions to README

* Use ES6 in eslint
pull/618/head
Dan Burzo 4 years ago committed by GitHub
parent 59570ba7fc
commit 1a61a23f68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,6 +1,9 @@
"use strict"; "use strict";
module.exports = { module.exports = {
"parserOptions": {
"ecmaVersion": 6
},
"rules": { "rules": {
// Braces only needed for multi-line arrow function blocks // Braces only needed for multi-line arrow function blocks
// "arrow-body-style": [2, "as-needed"], // "arrow-body-style": [2, "as-needed"],

@ -874,7 +874,11 @@
JSDOMParser.prototype = { JSDOMParser.prototype = {
error: function(m) { error: function(m) {
dump("JSDOMParser error: " + m + "\n"); if (typeof dump !== "undefined") {
dump("JSDOMParser error: " + m + "\n");
} else if (typeof console !== "undefined") {
console.log("JSDOMParser error: " + m + "\n");
}
this.errorState += m + "\n"; this.errorState += m + "\n";
}, },
@ -1186,3 +1190,7 @@
global.JSDOMParser = JSDOMParser; global.JSDOMParser = JSDOMParser;
})(this); })(this);
if (typeof module === "object") {
module.exports = this.JSDOMParser;
}

@ -2,7 +2,7 @@
A standalone version of the readability library used for Firefox Reader View. A standalone version of the readability library used for Firefox Reader View.
## Usage on the web. ## Usage on the web
To parse a document, you must create a new `Readability` object from a DOM document object, and then call `parse()`. Here's an example: To parse a document, you must create a new `Readability` object from a DOM document object, and then call `parse()`. Here's an example:
@ -32,10 +32,16 @@ var documentClone = document.cloneNode(true);
var article = new Readability(documentClone).parse(); var article = new Readability(documentClone).parse();
``` ```
## Usage from node.js ## Usage from Node.js
In node.js, you won't generally have a DOM document object. To obtain one, you can use external Readability is available on npm:
libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of
```bash
npm install @mozilla/readability
```
In Node.js, you won't generally have a DOM document object. To obtain one, you can use external
libraries like [jsdom](https://github.com/jsdom/jsdom). While this repository contains a parser of
its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do
not recommend it for general use. not recommend it for general use.
@ -45,10 +51,11 @@ property of the `options` object you pass the `JSDOM` constructor.
### Example: ### Example:
``` ```js
var { Readability } = require('@mozilla/readability');
var JSDOM = require('jsdom').JSDOM; var JSDOM = require('jsdom').JSDOM;
var doc = new JSDOM("<body>Here's a bunch of text</body>", { var doc = new JSDOM("<body>Here's a bunch of text</body>", {
url: "https://www.example.com/the-page-i-got-the-source-from", url: "https://www.example.com/the-page-i-got-the-source-from"
}); });
let reader = new Readability(doc.window.document); let reader = new Readability(doc.window.document);
let article = reader.parse(); let article = reader.parse();

@ -1,5 +1,4 @@
/* eslint-env es6:false */ /* eslint-env es6:false */
/* globals exports */
/* /*
* Copyright (c) 2010 Arc90 Inc * Copyright (c) 2010 Arc90 Inc
* *
@ -95,6 +94,6 @@ function isProbablyReaderable(doc, isVisible) {
}); });
} }
if (typeof exports === "object") { if (typeof module === "object") {
exports.isProbablyReaderable = isProbablyReaderable; module.exports = isProbablyReaderable;
} }

@ -1,10 +1,8 @@
var getTestPages = require("../test/utils").getTestPages; var getTestPages = require("../test/utils").getTestPages;
var readability = require("../index.js"); var { Readability, isProbablyReaderable } = require("../index");
var readabilityCheck = require("../Readability-readerable.js");
var JSDOM = require("jsdom").JSDOM; var JSDOM = require("jsdom").JSDOM;
var Readability = readability.Readability; var JSDOMParser = require("../JSDOMParser");
var JSDOMParser = readability.JSDOMParser;
var referenceTestPages = [ var referenceTestPages = [
"002", "002",
@ -62,7 +60,7 @@ suite("isProbablyReaderable perf", function () {
url: uri, url: uri,
}).window.document; }).window.document;
bench(testPage.dir + " readability perf", function() { bench(testPage.dir + " readability perf", function() {
readabilityCheck.isProbablyReaderable(doc); isProbablyReaderable(doc);
}); });
}); });
}); });

@ -1,29 +1,7 @@
var path = require("path"); var Readability = require("./Readability");
var fs = require("fs"); var isProbablyReaderable = require("./Readability-readerable");
var url = require("url");
// We want to load Readability and JSDOMParser, which aren't set up as commonjs libraries,
// and so we need to do some hocus-pocus with 'vm' to import them on a separate scope
// (identical) scope context.
var vm = require("vm");
var readabilityPath = path.join(__dirname, "Readability.js");
var jsdomPath = path.join(__dirname, "JSDOMParser.js");
var scopeContext = {};
// We generally expect dump() and console.{whatever} to work, so make these available
// in the scope we're using:
scopeContext.dump = console.log;
scopeContext.console = console;
scopeContext.URL = url.URL;
// Actually load files. NB: if either of the files has parse errors,
// node is dumb and shows you a syntax error *at this callsite* . Don't try to find
// a syntax error on this line, there isn't one. Go look in the file it's loading instead.
vm.runInNewContext(fs.readFileSync(jsdomPath), scopeContext, jsdomPath);
vm.runInNewContext(fs.readFileSync(readabilityPath), scopeContext, readabilityPath);
module.exports = { module.exports = {
Readability: scopeContext.Readability, Readability: Readability,
JSDOMParser: scopeContext.JSDOMParser isProbablyReaderable: isProbablyReaderable
}; };

@ -1,8 +1,8 @@
{ {
"name": "readability", "name": "@mozilla/readability",
"version": "0.2.0", "version": "0.2.0",
"description": "A standalone version of the readability library used for Firefox Reader View.", "description": "A standalone version of the readability library used for Firefox Reader View.",
"main": "Readability.js", "main": "index.js",
"scripts": { "scripts": {
"lint": "eslint .", "lint": "eslint .",
"test": "mocha test/test-*.js", "test": "mocha test/test-*.js",
@ -20,7 +20,7 @@
"url": "https://github.com/mozilla/readability/issues" "url": "https://github.com/mozilla/readability/issues"
}, },
"engines": { "engines": {
"node": ">=7.0" "node": ">=10.0.0"
}, },
"homepage": "https://github.com/mozilla/readability", "homepage": "https://github.com/mozilla/readability",
"devDependencies": { "devDependencies": {

@ -8,10 +8,8 @@ var http = require("http");
var urlparse = require("url").parse; var urlparse = require("url").parse;
var htmltidy = require("htmltidy2").tidy; var htmltidy = require("htmltidy2").tidy;
var readabilityCheck = require("../Readability-readerable"); var { Readability, isProbablyReaderable } = require("../index");
var readability = require("../index"); var JSDOMParser = require("../JSDOMParser");
var Readability = readability.Readability;
var JSDOMParser = readability.JSDOMParser;
var FFX_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:38.0) Gecko/20100101 Firefox/38.0"; var FFX_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:38.0) Gecko/20100101 Firefox/38.0";
@ -132,7 +130,7 @@ function runReadability(source, destPath, metadataDestPath) {
url: uri, url: uri,
}).window.document; }).window.document;
myReader = new Readability(jsdomDoc); myReader = new Readability(jsdomDoc);
readerable = readabilityCheck.isProbablyReaderable(jsdomDoc); readerable = isProbablyReaderable(jsdomDoc);
} catch (ex) { } catch (ex) {
console.error(ex); console.error(ex);
ex.stack.forEach(console.log.bind(console)); ex.stack.forEach(console.log.bind(console));

@ -4,7 +4,7 @@ chai.config.includeStack = true;
var expect = chai.expect; var expect = chai.expect;
var testPages = require("./utils").getTestPages(); var testPages = require("./utils").getTestPages();
var readabilityCheck = require("../Readability-readerable.js"); var isProbablyReaderable = require("../index").isProbablyReaderable;
describe("isProbablyReaderable - test pages", function() { describe("isProbablyReaderable - test pages", function() {
testPages.forEach(function(testPage) { testPages.forEach(function(testPage) {
@ -15,7 +15,7 @@ describe("isProbablyReaderable - test pages", function() {
}).window.document; }).window.document;
var expected = testPage.expectedMetadata.readerable; var expected = testPage.expectedMetadata.readerable;
it("The result should " + (expected ? "" : "not ") + "be readerable", function() { it("The result should " + (expected ? "" : "not ") + "be readerable", function() {
expect(readabilityCheck.isProbablyReaderable(doc)).eql(expected); expect(isProbablyReaderable(doc)).eql(expected);
}); });
}); });
}); });

@ -2,8 +2,7 @@ var chai = require("chai");
chai.config.includeStack = true; chai.config.includeStack = true;
var expect = chai.expect; var expect = chai.expect;
var readability = require("../index.js"); var JSDOMParser = require("../JSDOMParser");
var JSDOMParser = readability.JSDOMParser;
var BASETESTCASE = '<html><body><p>Some text and <a class="someclass" href="#">a link</a></p>' + var BASETESTCASE = '<html><body><p>Some text and <a class="someclass" href="#">a link</a></p>' +
'<div id="foo">With a <script>With &lt; fancy " characters in it because' + '<div id="foo">With a <script>With &lt; fancy " characters in it because' +

@ -4,9 +4,8 @@ var sinon = require("sinon");
chai.config.includeStack = true; chai.config.includeStack = true;
var expect = chai.expect; var expect = chai.expect;
var readability = require("../index"); var Readability = require("../index").Readability;
var Readability = readability.Readability; var JSDOMParser = require("../JSDOMParser");
var JSDOMParser = readability.JSDOMParser;
var testPages = require("./utils").getTestPages(); var testPages = require("./utils").getTestPages();

Loading…
Cancel
Save