chore: cleanup

8 years ago · 76df30e303
parent b3481a2c45
commit 76df30e303
11 changed files with 53 additions and 60 deletions
--- a/TODO.md
+++ b/TODO.md
@ -1,10 +1,10 @@
 TODO:
 - Complete response:
-  - add excerpt
  - add word count
 - Test if .is method is faster than regex methods

 DONE:
+x add excerpt
 x add total pages
 x add rendered pages
 x add canonicalUrl
--- a/package.json
+++ b/package.json
@ -1,8 +1,8 @@
 {
-  "name": "js_read",
+  "name": "node-readability",
  "version": "1.0.0",
  "description": "",
-  "main": "index.js",
+  "main": "./dist/iris.js",
  "scripts": {
    "start": "node ./build",
    "lint": "eslint src/** --fix",
@ -41,6 +41,7 @@
    "babel-polyfill": "^6.13.0",
    "cheerio": "^0.20.0",
    "difflib": "^0.2.4",
+    "ellipsize": "0.0.2",
    "moment": "^2.14.1",
    "request": "^2.74.0",
    "request-promise": "^4.1.1",
--- a/rollup.config.js
+++ b/rollup.config.js
@ -9,6 +9,6 @@ export default {
    babel(babelrc()),
  ],
  format: 'cjs',
-  dest: 'dist/bundle.js', // equivalent to --output
+  dest: 'dist/iris.js', // equivalent to --output
  sourceMap: true,
 }
--- a/src/extractors/collect-all-pages.js
+++ b/src/extractors/collect-all-pages.js
@ -3,8 +3,6 @@ import { removeAnchor } from 'utils/text';
 import RootExtractor from 'extractors/root-extractor';
 import Resource from 'resource';

-import Iris from '../iris';
-
 export default async function collectAllPages(
  {
    nextPageUrl,
@ -52,7 +50,6 @@ export default async function collectAllPages(
    };

    nextPageUrl = nextPageResult.nextPageUrl;
-
  }

  return {
--- a/src/extractors/custom/nymag.com/index.test.js
+++ b/src/extractors/custom/nymag.com/index.test.js
@ -1,21 +1,18 @@
-import assert from 'assert'
-import fs from 'fs'
+import assert from 'assert';
+import fs from 'fs';

-import RootExtractor from 'extractors/root-extractor'
-import Resource from 'extractors/root-extractor'
-import Iris from 'iris'
-import NYMagExtractor from 'extractors/custom/nymag.com'
+import Iris from 'iris';

 describe('NYMagExtractor', () => {
-  it('works with a feature story', async () => {
-    const html = fs.readFileSync('./fixtures/nymag.com/ailes.html')
-    const uri = 'http://nymag.com/daily/intelligencer/2016/09/how-fox-news-women-took-down-roger-ailes.html'
+  it('works with a feature story', (async) () => {
+    const html = fs.readFileSync('./fixtures/nymag.com/ailes.html');
+    const uri = 'http://nymag.com/daily/intelligencer/2016/09/how-fox-news-women-took-down-roger-ailes.html';

-    const { dek, title, author } = await Iris.parse(uri, html)
-    const actualDek = 'How Fox News women took down the most powerful, and predatory, man in media.'
+    const { dek, title, author } = await Iris.parse(uri, html);
+    const actualDek = 'How Fox News women took down the most powerful, and predatory, man in media.';

-    assert.equal(dek, actualDek)
-    assert.equal(title, 'The Revenge of Roger’s Angels')
-    assert.equal(author, 'Gabriel Sherman')
-  })
-})
+    assert.equal(dek, actualDek);
+    assert.equal(title, 'The Revenge of Roger’s Angels');
+    assert.equal(author, 'Gabriel Sherman');
+  });
+});
--- a/src/extractors/generic/excerpt/extractor.js
+++ b/src/extractors/generic/excerpt/extractor.js
@ -1,4 +1,4 @@
-import ellipsize from 'ellipsize'
+import ellipsize from 'ellipsize';

 import {
  extractFromMeta,
@ -7,9 +7,9 @@ import {

 import { EXCERPT_META_SELECTORS } from './constants';

-export function clean(content, $, maxLength=200) {
-  content = content.replace(/[\s\n]+/g, ' ').trim()
-  return ellipsize(content, 200, { ellipse: '&hellip;' })
+export function clean(content, $, maxLength = 200) {
+  content = content.replace(/[\s\n]+/g, ' ').trim();
+  return ellipsize(content, maxLength, { ellipse: '&hellip;' });
 }

 const GenericExcerptExtractor = {
@ -19,10 +19,10 @@ const GenericExcerptExtractor = {
      return clean(stripTags(excerpt, $));
    }
    // Fall back to excerpting from the extracted content
-    const maxLength = 200
-    const shortContent = content.slice(0, maxLength * 5)
-    return clean($(shortContent).text(), $, maxLength)
-  }
-}
+    const maxLength = 200;
+    const shortContent = content.slice(0, maxLength * 5);
+    return clean($(shortContent).text(), $, maxLength);
+  },
+};

-export default GenericExcerptExtractor
+export default GenericExcerptExtractor;
--- a/src/extractors/generic/excerpt/extractor.test.js
+++ b/src/extractors/generic/excerpt/extractor.test.js
@ -1,15 +1,15 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

 import {
  default as GenericExcerptExtractor,
  clean,
-} from './extractor'
+} from './extractor';

 describe('GenericExcerptExtractor', () => {
  describe('extract({ $, content, metaCache })', () => {
    it('returns og:description', () => {
-      const actualExcerpt = "Wow this is going to be something good."
+      const actualExcerpt = 'Wow this is going to be something good.';
      const html = `
        <html>
          <head>
@ -23,10 +23,10 @@ describe('GenericExcerptExtractor', () => {
      const excerpt = GenericExcerptExtractor.extract({ $, content: '', metaCache });

      assert.equal(excerpt, actualExcerpt);
-    })
+    });

    it('returns twitter:description', () => {
-      const actualExcerpt = "Wow this is going to be something good."
+      const actualExcerpt = 'Wow this is going to be something good.';
      const html = `
        <html>
          <head>
@ -40,7 +40,7 @@ describe('GenericExcerptExtractor', () => {
      const excerpt = GenericExcerptExtractor.extract({ $, content: '', metaCache });

      assert.equal(excerpt, actualExcerpt);
-    })
+    });

    it('falls back to the content', () => {
      const html = `
@ -50,16 +50,15 @@ describe('GenericExcerptExtractor', () => {
        </html>
      `;
      const $ = cheerio.load(html);
-      const content = "<div><p>Wow <b>this</b> is going to be something good.</p></div>"
+      const content = '<div><p>Wow <b>this</b> is going to be something good.</p></div>';
      const metaCache = [];

      const excerpt = GenericExcerptExtractor.extract({ $, content, metaCache });

      assert.equal(excerpt, 'Wow this is going to be something good.');
-    })
-
-  })
-})
+    });
+  });
+});

 describe('clean(text)', () => {
  it('truncates text longer than 200 chars and trims whitespance', () => {
@ -70,15 +69,15 @@ describe('clean(text)', () => {
      Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu 
      fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in 
      culpa qui officia deserunt mollit anim id est laborum.
-    `
-    const text = clean(longText)
+    `;
+    const text = clean(longText);
    let shouldBe = `
      Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
      incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud
      exercitation ullamco laboris nisi ut&hellip;
-    `
-    shouldBe = shouldBe.replace(/[\s\n]+/g, ' ').trim()
+    `;
+    shouldBe = shouldBe.replace(/[\s\n]+/g, ' ').trim();

-    assert.equal(text, shouldBe)
-  })
-})
+    assert.equal(text, shouldBe);
+  });
+});
--- a/src/iris.js
+++ b/src/iris.js
@ -7,7 +7,7 @@ const Iris = {
  async parse(url, html, opts = {}) {
    const { fetchAllPages = true } = opts || true;
    const Extractor = getExtractor(url);
-    console.log(`Using extractor for ${Extractor.domain}`);
+    // console.log(`Using extractor for ${Extractor.domain}`);

    const $ = await Resource.create(url, html);
    html = $.html();
@ -29,7 +29,6 @@ const Iris = {
          $,
          metaCache,
          result,
-          Extractor,
          title,
          url,
        }
@ -39,7 +38,7 @@ const Iris = {
        ...result,
        totalPages: 1,
        renderedPages: 1,
-      }
+      };
    }

    return result;
--- a/src/iris.test.js
+++ b/src/iris.test.js
@ -41,10 +41,10 @@ describe('Iris', () => {
        { fetchAllPages: true }
      );

-      const { totalPages, pagesRendered } = result
+      const { totalPages, pagesRendered } = result;

-      assert.equal(totalPages, 3)
-      assert.equal(pagesRendered, 3)
+      assert.equal(totalPages, 3);
+      assert.equal(pagesRendered, 3);

      // console.log(result)
      assert.equal(result.nextPageUrl, `${url}2`);
--- a/src/utils/dom/constants.js
+++ b/src/utils/dom/constants.js
@ -18,7 +18,7 @@ export const STRIP_OUTPUT_TAGS = [
 export const REMOVE_ATTRS = ['style', 'align'];
 export const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);
 export const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
-export const WHITELIST_ATTRS = ['src', 'href', 'class', 'id', 'score'];
+export const WHITELIST_ATTRS = ['src', 'href', 'class', 'id'];
 export const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');

 // removeEmpty
--- a/2
+++ b/2
@ -1,4 +1,4 @@
 #!/bin/bash
 # Runs the mocha tests

-mocha --compilers js:babel-register $(find src -name "*.test.js") --require babel-polyfill
+mocha --reporter spec --compilers js:babel-register $(find src -name "*.test.js") --require babel-polyfill