You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.2 KiB
JavaScript
49 lines
1.2 KiB
JavaScript
import { cleanAuthor } from 'cleaners';
|
|
import {
|
|
extractFromMeta,
|
|
extractFromSelectors,
|
|
} from 'utils/dom';
|
|
|
|
import {
|
|
AUTHOR_META_TAGS,
|
|
AUTHOR_MAX_LENGTH,
|
|
AUTHOR_SELECTORS,
|
|
BYLINE_SELECTORS_RE,
|
|
} from './constants';
|
|
|
|
const GenericAuthorExtractor = {
|
|
extract({ $, metaCache }) {
|
|
let author;
|
|
|
|
// First, check to see if we have a matching
|
|
// meta tag that we can make use of.
|
|
author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
|
|
if (author && author.length < AUTHOR_MAX_LENGTH) {
|
|
return cleanAuthor(author);
|
|
}
|
|
|
|
// Second, look through our selectors looking for potential authors.
|
|
author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
|
|
if (author && author.length < AUTHOR_MAX_LENGTH) {
|
|
return cleanAuthor(author);
|
|
}
|
|
|
|
// Last, use our looser regular-expression based selectors for
|
|
// potential authors.
|
|
for (const [selector, regex] of BYLINE_SELECTORS_RE) {
|
|
const node = $(selector);
|
|
if (node.length === 1) {
|
|
const text = node.text();
|
|
if (regex.test(text)) {
|
|
return cleanAuthor(text);
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
},
|
|
};
|
|
|
|
export default GenericAuthorExtractor;
|
|
|