feat: added scoreContent function

pull/1/head
Adam Pash 8 years ago
parent bd7ed77f23
commit 44eae5e931

@ -1,4 +1,5 @@
Next: Work on score-content, making sure it's working as intended (seems to be)
Get better sense of when cheerio returns a raw node and when a cheerio object
- `extract` (this kicks it all off)
x `node_is_sufficient`
@ -14,7 +15,7 @@ x `_paragraphize`
x `_get_score`
x `_set_score`
x `_add_score`
- `_score_content`
x `_score_content`
x `_score_node`
x `_score_paragraph`
@ -23,3 +24,6 @@ x `_score_paragraph`
- `_find_top_candidate`
- `extract_clean_node`
- `_clean_conditionally`
Make sure weightNodes flag is being passed properly

@ -1,5 +1,6 @@
import assert from 'assert'
import cheerio from 'cheerio'
import fs from 'fs'
import {
clean
@ -11,19 +12,41 @@ import {
getScore
} from './index'
// TODO: Walk through these and sanity check my scores
// Commented out scores were what I expected, but I was also
// probably missing something when calculating
describe('scoreContent($, weightNodes)', () => {
it("loves hNews content", () => {
const $ = cheerio.load(HTML.hNews.before)
const result = scoreContent($).html()
assert.equal(getScore($('div').first(), $), 99)
assert.equal(getScore($('div').first(), $), 110)
// assert.equal(getScore($('div').first(), $), 99)
})
it("is so-so about non-hNews content", () => {
const $ = cheerio.load(HTML.nonHNews.before)
const result = scoreContent($).html()
assert.equal(getScore($('div').first(), $), 38)
// assert.equal(getScore($('div').first(), $), 38)
assert.equal(getScore($('div').first(), $), 60)
})
it("scores this Wired article the same", () => {
const html = fs.readFileSync('../fixtures/wired.html', 'utf-8')
const $ = cheerio.load(html)
const result = scoreContent($).html()
// assert.equal(getScore($('article').first(), $), 63.75)
assert.equal(getScore($('article').first(), $), 67)
})
// it("scores this NYT article the same", () => {
// const html = fs.readFileSync('../fixtures/nytimes.html', 'utf-8')
// const $ = cheerio.load(html)
// const result = scoreContent($).html()
//
// assert.equal(getScore($('div').first(), $), 385)
// })
})

@ -10,6 +10,9 @@ import {
export default function scoreNode(node) {
const { tagName } = node.get(0)
// TODO: Consider ordering by most likely.
// E.g., if divs are a more common tag on a page,
// Could save doing that regex test on every node AP
if (PARAGRAPH_SCORE_TAGS.test(tagName)) {
return scoreParagraph(node)
} else if (tagName === 'div') {

Loading…
Cancel
Save