feat: improve nymag.com extractor to grab deks from features

pull/3/head
Adam Pash 8 years ago
parent 21f444367f
commit 81ed4f00ed

@ -11,7 +11,8 @@
"./cleaners": "cleaners",
"./resource": "resource",
"./extractors": "extractors",
"./test-helpers.js": "test-helpers"
"./test-helpers.js": "test-helpers",
"./iris.js": "iris"
}
}]
]

@ -2,11 +2,11 @@ TODO:
- Complete response:
- add excerpt
- add word count
- add total pages
- add rendered pages
- Test if .is method is faster than regex methods
DONE:
x add total pages
x add rendered pages
x add canonicalUrl
x add domain
x Separate constants into activity-specific folders (dom, scoring)

File diff suppressed because one or more lines are too long

@ -38,6 +38,7 @@ const NYMagExtractor = {
title: {
selectors: [
'h1.lede-feature-title',
'h1.headline-primary',
'h1',
],
@ -46,6 +47,13 @@ const NYMagExtractor = {
author: {
selectors: [
'.by-authors',
'.lede-feature-author',
],
},
dek: {
selectors: [
'.lede-feature-teaser',
],
},

@ -0,0 +1,21 @@
import assert from 'assert'
import fs from 'fs'
import RootExtractor from 'extractors/root-extractor'
import Resource from 'extractors/root-extractor'
import Iris from 'iris'
import NYMagExtractor from 'extractors/custom/nymag.com'
describe('NYMagExtractor', () => {
it('works with a feature story', async () => {
const html = fs.readFileSync('./fixtures/nymag.com/ailes.html')
const uri = 'http://nymag.com/daily/intelligencer/2016/09/how-fox-news-women-took-down-roger-ailes.html'
const { dek, title, author } = await Iris.parse(uri, html)
const actualDek = 'How Fox News women took down the most powerful, and predatory, man in media.'
assert.equal(dek, actualDek)
assert.equal(title, 'The Revenge of Rogers Angels')
assert.equal(author, 'Gabriel Sherman')
})
})
Loading…
Cancel
Save