You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/content/scoring/score-length.js

27 lines
622 B
JavaScript

const idkRe = new RegExp('^(p|pre)$', 'i')
export default function scoreLength(textLength, tagName='p') {
let score
const chunks = textLength / 50
if (chunks > 0) {
let lengthBonus
// No idea why p or pre are being tamped down here
// but just following the source for now
// Not even sure why tagName is included here,
// since this is only being called from the context
// of scoreParagraph
if (idkRe.test(tagName)) {
lengthBonus = chunks - 2
} else {
lengthBonus = chunks - 1.25
}
return Math.min(Math.max(lengthBonus, 0), 3)
} else {
return 0
}
}