From 1a61a23f6866e26f2c0fb207528539e925b91b85 Mon Sep 17 00:00:00 2001 From: Dan Burzo Date: Wed, 5 Aug 2020 14:17:05 +0300 Subject: [PATCH] Readability on npm (#608) * Initial work on preparing Readability for npm * Adjust some require()s * Point package.json to index.js * Add Node.js instructions to README * Use ES6 in eslint --- .eslintrc.js | 3 +++ JSDOMParser.js | 10 +++++++++- README.md | 19 +++++++++++++------ Readability-readerable.js | 5 ++--- benchmarks/benchmarks.js | 8 +++----- index.js | 30 ++++-------------------------- package.json | 6 +++--- test/generate-testcase.js | 8 +++----- test/test-isProbablyReaderable.js | 4 ++-- test/test-jsdomparser.js | 3 +-- test/test-readability.js | 5 ++--- 11 files changed, 45 insertions(+), 56 deletions(-) diff --git a/.eslintrc.js b/.eslintrc.js index 9ab0905..117e6ca 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -1,6 +1,9 @@ "use strict"; module.exports = { + "parserOptions": { + "ecmaVersion": 6 + }, "rules": { // Braces only needed for multi-line arrow function blocks // "arrow-body-style": [2, "as-needed"], diff --git a/JSDOMParser.js b/JSDOMParser.js index c98560b..7bfa2ac 100644 --- a/JSDOMParser.js +++ b/JSDOMParser.js @@ -874,7 +874,11 @@ JSDOMParser.prototype = { error: function(m) { - dump("JSDOMParser error: " + m + "\n"); + if (typeof dump !== "undefined") { + dump("JSDOMParser error: " + m + "\n"); + } else if (typeof console !== "undefined") { + console.log("JSDOMParser error: " + m + "\n"); + } this.errorState += m + "\n"; }, @@ -1186,3 +1190,7 @@ global.JSDOMParser = JSDOMParser; })(this); + +if (typeof module === "object") { + module.exports = this.JSDOMParser; +} diff --git a/README.md b/README.md index 6db3c92..c908d2d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A standalone version of the readability library used for Firefox Reader View. -## Usage on the web. +## Usage on the web To parse a document, you must create a new `Readability` object from a DOM document object, and then call `parse()`. Here's an example: @@ -32,10 +32,16 @@ var documentClone = document.cloneNode(true); var article = new Readability(documentClone).parse(); ``` -## Usage from node.js +## Usage from Node.js -In node.js, you won't generally have a DOM document object. To obtain one, you can use external -libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of +Readability is available on npm: + +```bash +npm install @mozilla/readability +``` + +In Node.js, you won't generally have a DOM document object. To obtain one, you can use external +libraries like [jsdom](https://github.com/jsdom/jsdom). While this repository contains a parser of its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do not recommend it for general use. @@ -45,10 +51,11 @@ property of the `options` object you pass the `JSDOM` constructor. ### Example: -``` +```js +var { Readability } = require('@mozilla/readability'); var JSDOM = require('jsdom').JSDOM; var doc = new JSDOM("Here's a bunch of text", { - url: "https://www.example.com/the-page-i-got-the-source-from", + url: "https://www.example.com/the-page-i-got-the-source-from" }); let reader = new Readability(doc.window.document); let article = reader.parse(); diff --git a/Readability-readerable.js b/Readability-readerable.js index 650f7f3..f5df709 100644 --- a/Readability-readerable.js +++ b/Readability-readerable.js @@ -1,5 +1,4 @@ /* eslint-env es6:false */ -/* globals exports */ /* * Copyright (c) 2010 Arc90 Inc * @@ -95,6 +94,6 @@ function isProbablyReaderable(doc, isVisible) { }); } -if (typeof exports === "object") { - exports.isProbablyReaderable = isProbablyReaderable; +if (typeof module === "object") { + module.exports = isProbablyReaderable; } diff --git a/benchmarks/benchmarks.js b/benchmarks/benchmarks.js index 1ff4eba..2d4dee3 100644 --- a/benchmarks/benchmarks.js +++ b/benchmarks/benchmarks.js @@ -1,10 +1,8 @@ var getTestPages = require("../test/utils").getTestPages; -var readability = require("../index.js"); -var readabilityCheck = require("../Readability-readerable.js"); +var { Readability, isProbablyReaderable } = require("../index"); var JSDOM = require("jsdom").JSDOM; -var Readability = readability.Readability; -var JSDOMParser = readability.JSDOMParser; +var JSDOMParser = require("../JSDOMParser"); var referenceTestPages = [ "002", @@ -62,7 +60,7 @@ suite("isProbablyReaderable perf", function () { url: uri, }).window.document; bench(testPage.dir + " readability perf", function() { - readabilityCheck.isProbablyReaderable(doc); + isProbablyReaderable(doc); }); }); }); diff --git a/index.js b/index.js index 8af525e..aed1ef3 100644 --- a/index.js +++ b/index.js @@ -1,29 +1,7 @@ -var path = require("path"); -var fs = require("fs"); -var url = require("url"); - -// We want to load Readability and JSDOMParser, which aren't set up as commonjs libraries, -// and so we need to do some hocus-pocus with 'vm' to import them on a separate scope -// (identical) scope context. -var vm = require("vm"); -var readabilityPath = path.join(__dirname, "Readability.js"); -var jsdomPath = path.join(__dirname, "JSDOMParser.js"); - - -var scopeContext = {}; -// We generally expect dump() and console.{whatever} to work, so make these available -// in the scope we're using: -scopeContext.dump = console.log; -scopeContext.console = console; -scopeContext.URL = url.URL; - -// Actually load files. NB: if either of the files has parse errors, -// node is dumb and shows you a syntax error *at this callsite* . Don't try to find -// a syntax error on this line, there isn't one. Go look in the file it's loading instead. -vm.runInNewContext(fs.readFileSync(jsdomPath), scopeContext, jsdomPath); -vm.runInNewContext(fs.readFileSync(readabilityPath), scopeContext, readabilityPath); +var Readability = require("./Readability"); +var isProbablyReaderable = require("./Readability-readerable"); module.exports = { - Readability: scopeContext.Readability, - JSDOMParser: scopeContext.JSDOMParser + Readability: Readability, + isProbablyReaderable: isProbablyReaderable }; diff --git a/package.json b/package.json index 4504e91..c6ce1cd 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,8 @@ { - "name": "readability", + "name": "@mozilla/readability", "version": "0.2.0", "description": "A standalone version of the readability library used for Firefox Reader View.", - "main": "Readability.js", + "main": "index.js", "scripts": { "lint": "eslint .", "test": "mocha test/test-*.js", @@ -20,7 +20,7 @@ "url": "https://github.com/mozilla/readability/issues" }, "engines": { - "node": ">=7.0" + "node": ">=10.0.0" }, "homepage": "https://github.com/mozilla/readability", "devDependencies": { diff --git a/test/generate-testcase.js b/test/generate-testcase.js index a3691ba..77584c7 100644 --- a/test/generate-testcase.js +++ b/test/generate-testcase.js @@ -8,10 +8,8 @@ var http = require("http"); var urlparse = require("url").parse; var htmltidy = require("htmltidy2").tidy; -var readabilityCheck = require("../Readability-readerable"); -var readability = require("../index"); -var Readability = readability.Readability; -var JSDOMParser = readability.JSDOMParser; +var { Readability, isProbablyReaderable } = require("../index"); +var JSDOMParser = require("../JSDOMParser"); var FFX_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:38.0) Gecko/20100101 Firefox/38.0"; @@ -132,7 +130,7 @@ function runReadability(source, destPath, metadataDestPath) { url: uri, }).window.document; myReader = new Readability(jsdomDoc); - readerable = readabilityCheck.isProbablyReaderable(jsdomDoc); + readerable = isProbablyReaderable(jsdomDoc); } catch (ex) { console.error(ex); ex.stack.forEach(console.log.bind(console)); diff --git a/test/test-isProbablyReaderable.js b/test/test-isProbablyReaderable.js index 8451c0f..5757c52 100644 --- a/test/test-isProbablyReaderable.js +++ b/test/test-isProbablyReaderable.js @@ -4,7 +4,7 @@ chai.config.includeStack = true; var expect = chai.expect; var testPages = require("./utils").getTestPages(); -var readabilityCheck = require("../Readability-readerable.js"); +var isProbablyReaderable = require("../index").isProbablyReaderable; describe("isProbablyReaderable - test pages", function() { testPages.forEach(function(testPage) { @@ -15,7 +15,7 @@ describe("isProbablyReaderable - test pages", function() { }).window.document; var expected = testPage.expectedMetadata.readerable; it("The result should " + (expected ? "" : "not ") + "be readerable", function() { - expect(readabilityCheck.isProbablyReaderable(doc)).eql(expected); + expect(isProbablyReaderable(doc)).eql(expected); }); }); }); diff --git a/test/test-jsdomparser.js b/test/test-jsdomparser.js index b52d546..982647f 100644 --- a/test/test-jsdomparser.js +++ b/test/test-jsdomparser.js @@ -2,8 +2,7 @@ var chai = require("chai"); chai.config.includeStack = true; var expect = chai.expect; -var readability = require("../index.js"); -var JSDOMParser = readability.JSDOMParser; +var JSDOMParser = require("../JSDOMParser"); var BASETESTCASE = '

Some text and a link

' + '
With a