Refactor: running tests more efficiently (#49)

Only running one parser per page we're testing rather than a parser per field we're testing.
pull/51/head
Adam Pash 8 years ago committed by GitHub
parent edcb7295d1
commit 15656cb3e1

@ -20,10 +20,12 @@ var asyncToGenerator = _interopDefault(require('babel-runtime/helpers/asyncToGen
var cheerio = _interopDefault(require('cheerio'));
var promise = _interopDefault(require('babel-runtime/core-js/promise'));
var request = _interopDefault(require('request'));
var iconvLite = _interopDefault(require('iconv-lite'));
var keys = _interopDefault(require('babel-runtime/core-js/object/keys'));
var stringDirection = _interopDefault(require('string-direction'));
var validUrl = _interopDefault(require('valid-url'));
var moment = _interopDefault(require('moment'));
var momentTimezone = _interopDefault(require('moment-timezone'));
var momentParseformat = _interopDefault(require('moment-parseformat'));
var wuzzy = _interopDefault(require('wuzzy'));
var difflib = _interopDefault(require('difflib'));
var from = _interopDefault(require('babel-runtime/core-js/array/from'));
@ -31,7 +33,7 @@ var ellipsize = _interopDefault(require('ellipsize'));
var _taggedTemplateLiteral = _interopDefault(require('babel-runtime/helpers/taggedTemplateLiteral'));
// Spacer images to be removed
var SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');
var SPACER_RE = new RegExp('transparent|spacer|blank', 'i');
// The class we will use to mark elements we want to keep
// but would normally remove
@ -508,7 +510,6 @@ function getWeight(node) {
// the node's score attribute
// returns null if no score set
function getScore($node) {
// console.log("NODE", $node, $node.attr('score'))
return parseFloat($node.attr('score')) || null;
}
@ -724,6 +725,8 @@ var HAS_ALPHA_RE = /[a-z]/i;
var IS_ALPHA_RE = /^[a-z]+$/i;
var IS_DIGIT_RE = /^[0-9]+$/i;
var ENCODING_RE = /charset=([\w-]+)\b/;
function isGoodSegment(segment, index, firstSegmentHasLetters) {
var goodSegment = true;
@ -900,7 +903,6 @@ function removeUnlessContent($node, $, weight) {
}
}
/* eslint-disable */
function absolutize($, rootUrl, attr, $content) {
$('[' + attr + ']', $content).each(function (_, node) {
var attrs = getAttrs(node);
@ -1013,14 +1015,15 @@ function setAttr(node, attr, val) {
return node;
}
/* eslint-disable */
function setAttrs(node, attrs) {
if (node.attribs) {
node.attribs = attrs;
} else if (node.attributes) {
while (node.attributes.length > 0) {
node.removeAttribute(node.attributes[0].name);
}_Reflect$ownKeys(attrs).forEach(function (key) {
}
_Reflect$ownKeys(attrs).forEach(function (key) {
node.setAttribute(key, attrs[key]);
});
}
@ -1030,17 +1033,62 @@ function setAttrs(node, attrs) {
// DOM manipulation
function _interopDefault$1(ex){return ex&&(typeof ex==='undefined'?'undefined':_typeof(ex))==='object'&&'default'in ex?ex['default']:ex;}var _regeneratorRuntime=_interopDefault$1(regenerator);var _extends$1=_interopDefault$1(_extends);var _asyncToGenerator=_interopDefault$1(asyncToGenerator);var URL$1=_interopDefault$1(URL);var cheerio$1=_interopDefault$1(cheerio);var _Promise=_interopDefault$1(promise);var request$1=_interopDefault$1(request);var _Reflect$ownKeys$1=_interopDefault$1(_Reflect$ownKeys);var _toConsumableArray$1=_interopDefault$1(_toConsumableArray);var _defineProperty$1=_interopDefault$1(_defineProperty);var _slicedToArray$1=_interopDefault$1(_slicedToArray);var _typeof$1=_interopDefault$1(_typeof);var _getIterator$1=_interopDefault$1(_getIterator);var _Object$keys=_interopDefault$1(keys);var stringDirection$1=_interopDefault$1(stringDirection);var validUrl$1=_interopDefault$1(validUrl);var moment$1=_interopDefault$1(moment);var wuzzy$1=_interopDefault$1(wuzzy);var difflib$1=_interopDefault$1(difflib);var _Array$from=_interopDefault$1(from);var ellipsize$1=_interopDefault$1(ellipsize);var _marked=[range].map(_regeneratorRuntime.mark);function range(){var start=arguments.length>0&&arguments[0]!==undefined?arguments[0]:1;var end=arguments.length>1&&arguments[1]!==undefined?arguments[1]:1;return _regeneratorRuntime.wrap(function range$(_context){while(1){switch(_context.prev=_context.next){case 0:if(!(start<=end)){_context.next=5;break;}_context.next=3;return start+=1;case 3:_context.next=0;break;case 5:case"end":return _context.stop();}}},_marked[0],this);}// extremely simple url validation as a first step
function _interopDefault$1(ex){return ex&&(typeof ex==='undefined'?'undefined':_typeof(ex))==='object'&&'default'in ex?ex['default']:ex;}var _regeneratorRuntime=_interopDefault$1(regenerator);var _extends$1=_interopDefault$1(_extends);var _asyncToGenerator=_interopDefault$1(asyncToGenerator);var URL$1=_interopDefault$1(URL);var cheerio$1=_interopDefault$1(cheerio);var _Promise=_interopDefault$1(promise);var request$1=_interopDefault$1(request);var iconv=_interopDefault$1(iconvLite);var _slicedToArray$1=_interopDefault$1(_slicedToArray);var _Reflect$ownKeys$1=_interopDefault$1(_Reflect$ownKeys);var _toConsumableArray$1=_interopDefault$1(_toConsumableArray);var _defineProperty$1=_interopDefault$1(_defineProperty);var _typeof$1=_interopDefault$1(_typeof);var _getIterator$1=_interopDefault$1(_getIterator);var _Object$keys=_interopDefault$1(keys);var stringDirection$1=_interopDefault$1(stringDirection);var validUrl$1=_interopDefault$1(validUrl);var moment=_interopDefault$1(momentTimezone);var parseFormat=_interopDefault$1(momentParseformat);var wuzzy$1=_interopDefault$1(wuzzy);var difflib$1=_interopDefault$1(difflib);var _Array$from=_interopDefault$1(from);var ellipsize$1=_interopDefault$1(ellipsize);var _marked=[range].map(_regeneratorRuntime.mark);function range(){var start=arguments.length>0&&arguments[0]!==undefined?arguments[0]:1;var end=arguments.length>1&&arguments[1]!==undefined?arguments[1]:1;return _regeneratorRuntime.wrap(function range$(_context){while(1){switch(_context.prev=_context.next){case 0:if(!(start<=end)){_context.next=5;break;}_context.next=3;return start+=1;case 3:_context.next=0;break;case 5:case"end":return _context.stop();}}},_marked[0],this);}// extremely simple url validation as a first step
function validateUrl(_ref){var hostname=_ref.hostname;// If this isn't a valid url, return an error message
return!!hostname;}var Errors={badUrl:{error:true,messages:'The url parameter passed does not look like a valid URL. Please check your data and try again.'}};var REQUEST_HEADERS={'User-Agent':'Readability - http://readability.com/about/'};// The number of milliseconds to attempt to fetch a resource before timing out.
return!!hostname;}var Errors={badUrl:{error:true,messages:'The url parameter passed does not look like a valid URL. Please check your data and try again.'}};var NORMALIZE_RE$1=/\s{2,}/g;function normalizeSpaces$1(text){return text.replace(NORMALIZE_RE$1,' ').trim();}// Given a node type to search for, and a list of regular expressions,
// look to see if this extraction can be found in the URL. Expects
// that each expression in r_list will return group(1) as the proper
// string to be cleaned.
// Only used for date_published currently.
function extractFromUrl$1(url,regexList){var matchRe=regexList.find(function(re){return re.test(url);});if(matchRe){return matchRe.exec(url)[1];}return null;}// An expression that looks to try to find the page digit within a URL, if
// it exists.
// Matches:
// page=1
// pg=1
// p=1
// paging=12
// pag=7
// pagination/1
// paging/88
// pa/83
// p/11
//
// Does not match:
// pg=102
// page:2
var PAGE_IN_HREF_RE$1=new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})','i');var HAS_ALPHA_RE$1=/[a-z]/i;var IS_ALPHA_RE$1=/^[a-z]+$/i;var IS_DIGIT_RE$1=/^[0-9]+$/i;var ENCODING_RE$1=/charset=([\w-]+)\b/;function pageNumFromUrl$1(url){var matches=url.match(PAGE_IN_HREF_RE$1);if(!matches)return null;var pageNum=parseInt(matches[6],10);// Return pageNum < 100, otherwise
// return null
return pageNum<100?pageNum:null;}function removeAnchor$1(url){return url.split('#')[0].replace(/\/$/,'');}function isGoodSegment$1(segment,index,firstSegmentHasLetters){var goodSegment=true;// If this is purely a number, and it's the first or second
// url_segment, it's probably a page number. Remove it.
if(index<2&&IS_DIGIT_RE$1.test(segment)&&segment.length<3){goodSegment=true;}// If this is the first url_segment and it's just "index",
// remove it
if(index===0&&segment.toLowerCase()==='index'){goodSegment=false;}// If our first or second url_segment is smaller than 3 characters,
// and the first url_segment had no alphas, remove it.
if(index<2&&segment.length<3&&!firstSegmentHasLetters){goodSegment=false;}return goodSegment;}// Take a URL, and return the article base of said URL. That is, no
// pagination data exists in it. Useful for comparing to other links
// that might have pagination data within them.
function articleBaseUrl$1(url,parsed){var parsedUrl=parsed||URL$1.parse(url);var protocol=parsedUrl.protocol,host=parsedUrl.host,path=parsedUrl.path;var firstSegmentHasLetters=false;var cleanedSegments=path.split('/').reverse().reduce(function(acc,rawSegment,index){var segment=rawSegment;// Split off and save anything that looks like a file type.
if(segment.includes('.')){var _segment$split=segment.split('.'),_segment$split2=_slicedToArray$1(_segment$split,2),possibleSegment=_segment$split2[0],fileExt=_segment$split2[1];if(IS_ALPHA_RE$1.test(fileExt)){segment=possibleSegment;}}// If our first or second segment has anything looking like a page
// number, remove it.
if(PAGE_IN_HREF_RE$1.test(segment)&&index<2){segment=segment.replace(PAGE_IN_HREF_RE$1,'');}// If we're on the first segment, check to see if we have any
// characters in it. The first segment is actually the last bit of
// the URL, and this will be helpful to determine if we're on a URL
// segment that looks like "/2/" for example.
if(index===0){firstSegmentHasLetters=HAS_ALPHA_RE$1.test(segment);}// If it's not marked for deletion, push it to cleaned_segments.
if(isGoodSegment$1(segment,index,firstSegmentHasLetters)){acc.push(segment);}return acc;},[]);return protocol+'//'+host+cleanedSegments.reverse().join('/');}// Given a string, return True if it appears to have an ending sentence
// within it, false otherwise.
var SENTENCE_END_RE$1=new RegExp('.( |$)');function hasSentenceEnd$1(text){return SENTENCE_END_RE$1.test(text);}function excerptContent$1(content){var words=arguments.length>1&&arguments[1]!==undefined?arguments[1]:10;return content.trim().split(/\s+/).slice(0,words).join(' ');}// check a string for encoding; this is
// used in our fetchResource function to
// ensure correctly encoded responses
function getEncoding$1(str){if(ENCODING_RE$1.test(str)){return ENCODING_RE$1.exec(str)[1];}return null;}// Browser does not like us setting user agent
var REQUEST_HEADERS=cheerio$1.browser?{}:{'User-Agent':'Mercury - https://mercury.postlight.com/web-parser/'};// The number of milliseconds to attempt to fetch a resource before timing out.
var FETCH_TIMEOUT=10000;// Content types that we do not extract content from
var BAD_CONTENT_TYPES=['audio/mpeg','image/gif','image/jpeg','image/jpg'];var BAD_CONTENT_TYPES_RE=new RegExp('^('+BAD_CONTENT_TYPES.join('|')+')$','i');// Use this setting as the maximum size an article can be
// for us to attempt parsing. Defaults to 5 MB.
var MAX_CONTENT_LENGTH=5242880;// Turn the global proxy on or off
// Proxying is not currently enabled in Python source
// so not implementing logic in port.
function get(options){// eslint-disable-line
return new _Promise(function(resolve,reject){request$1(options,function(err,response,body){if(err){reject(err);}else{resolve({body:body,response:response});}});});}// Evaluate a response to ensure it's something we should be keeping.
function get(options){return new _Promise(function(resolve,reject){request$1(options,function(err,response,body){if(err){reject(err);}else{var encoding=getEncoding$1(response.headers['content-type']);if(iconv.encodingExists(encoding)){body=iconv.decode(body,encoding);}resolve({body:body,response:response});}});});}// Evaluate a response to ensure it's something we should be keeping.
// This does not validate in the sense of a response being 200 level or
// not. Validation here means that we haven't found reason to bail from
// further processing of this url.
@ -1059,7 +1107,7 @@ if(contentLength>MAX_CONTENT_LENGTH){throw new Error('Content for this resource
// proper exceptions on the many failure cases of HTTP.
// TODO: Ensure we are not fetching something enormous. Always return
// unicode content for HTML, with charset conversion.
var fetchResource$1=function(){var _ref2=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(url,parsedUrl){var options,_ref3,response,body;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:parsedUrl=parsedUrl||URL$1.parse(encodeURI(url));options={url:parsedUrl,headers:_extends$1({},REQUEST_HEADERS),timeout:FETCH_TIMEOUT,// Don't set encoding; fixes issues
var fetchResource$1=function(){var _ref2=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(url,parsedUrl){var options,_ref3,response,body;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:parsedUrl=parsedUrl||URL$1.parse(encodeURI(url));options={url:parsedUrl.href,headers:_extends$1({},REQUEST_HEADERS),timeout:FETCH_TIMEOUT,// Don't set encoding; fixes issues
// w/gzipped responses
encoding:null,// Accept cookies
jar:true,// Accept and decode gzip
@ -1071,7 +1119,7 @@ followAllRedirects:true};_context.next=4;return get(options);case 4:_ref3=_conte
// In addition, normalize 'property' attributes to 'name' for ease of
// querying later. See, e.g., og or twitter meta tags.
function normalizeMetaTags($){$=convertMetaProp($,'content','value');$=convertMetaProp($,'property','name');return $;}// Spacer images to be removed
var SPACER_RE$1=new RegExp('trans|transparent|spacer|blank','i');// The class we will use to mark elements we want to keep
var SPACER_RE$1=new RegExp('transparent|spacer|blank','i');// The class we will use to mark elements we want to keep
// but would normally remove
var KEEP_CLASS$1='mercury-parser-keep';var KEEP_SELECTORS$1=['iframe[src^="https://www.youtube.com"]','iframe[src^="http://www.youtube.com"]','iframe[src^="https://player.vimeo"]','iframe[src^="http://player.vimeo"]'];// A list of tags to strip from the output if we encounter them.
var STRIP_OUTPUT_TAGS$1=['title','script','noscript','link','style','hr','embed','iframe','object'];// cleanAttributes
@ -1185,7 +1233,8 @@ while(sibling&&!(sibling.tagName&&BLOCK_LEVEL_TAGS_RE$2.test(sibling.tagName))){
// :param $: A cheerio object to search
// :return cheerio object with new p elements
// (By-reference mutation, though. Returned just for convenience.)
function convertToParagraphs$$1($){$=brsToPs$$1($);$=convertDivs$1($);$=convertSpans$2($);return $;}function convertNodeTo$$1($node,$){var tag=arguments.length>2&&arguments[2]!==undefined?arguments[2]:'p';var node=$node.get(0);if(!node){return $;}var attrs=getAttrs$1(node)||{};var attribString=_Reflect$ownKeys$1(attrs).map(function(key){return key+'='+attrs[key];}).join(' ');var html=void 0;if($.browser){// In the browser, the contents of noscript tags aren't rendered, therefore
function convertToParagraphs$$1($){$=brsToPs$$1($);$=convertDivs$1($);$=convertSpans$2($);return $;}function convertNodeTo$$1($node,$){var tag=arguments.length>2&&arguments[2]!==undefined?arguments[2]:'p';var node=$node.get(0);if(!node){return $;}var attrs=getAttrs$1(node)||{};// console.log(attrs)
var attribString=_Reflect$ownKeys$1(attrs).map(function(key){return key+'='+attrs[key];}).join(' ');var html=void 0;if($.browser){// In the browser, the contents of noscript tags aren't rendered, therefore
// transforms on the noscript tag (commonly used for lazy-loading) don't work
// as expected. This test case handles that
html=node.tagName.toLowerCase()==='noscript'?$node.text():$node.html();}else{html=$node.contents();}$node.replaceWith('<'+tag+' '+attribString+'>'+html+'</'+tag+'>');return $;}function cleanForHeight$1($img,$){var height=parseInt($img.attr('height'),10);var width=parseInt($img.attr('width'),10)||20;// Remove images that explicitly have very small heights or
@ -1279,8 +1328,7 @@ if(PHOTO_HINTS_RE$1$1.test(classes)){score+=10;}// add 25 if class matches entry
if(READABILITY_ASSET$1$1.test(classes)){score+=25;}}return score;}// returns the score of a node based on
// the node's score attribute
// returns null if no score set
function getScore$1($node){// console.log("NODE", $node, $node.attr('score'))
return parseFloat($node.attr('score'))||null;}// return 1 for every comma in text
function getScore$1($node){return parseFloat($node.attr('score'))||null;}// return 1 for every comma in text
function scoreCommas$1(text){return(text.match(/,/g)||[]).length;}var idkRe$1=new RegExp('^(p|pre)$','i');function scoreLength$1(textLength){var tagName=arguments.length>1&&arguments[1]!==undefined?arguments[1]:'p';var chunks=textLength/50;if(chunks>0){var lengthBonus=void 0;// No idea why p or pre are being tamped down here
// but just following the source for now
// Not even sure why tagName is included here,
@ -1320,50 +1368,7 @@ HNEWS_CONTENT_SELECTORS$1$1.forEach(function(_ref){var _ref2=_slicedToArray$1(_r
// in which parents weren't retaining
// scores. This is not ideal, and
// should be fixed.
scorePs$1($,weightNodes);scorePs$1($,weightNodes);return $;}var NORMALIZE_RE$1=/\s{2,}/g;function normalizeSpaces$1(text){return text.replace(NORMALIZE_RE$1,' ').trim();}// Given a node type to search for, and a list of regular expressions,
// look to see if this extraction can be found in the URL. Expects
// that each expression in r_list will return group(1) as the proper
// string to be cleaned.
// Only used for date_published currently.
function extractFromUrl$1(url,regexList){var matchRe=regexList.find(function(re){return re.test(url);});// const matchRe = null
if(matchRe){return matchRe.exec(url)[1];}return null;}// An expression that looks to try to find the page digit within a URL, if
// it exists.
// Matches:
// page=1
// pg=1
// p=1
// paging=12
// pag=7
// pagination/1
// paging/88
// pa/83
// p/11
//
// Does not match:
// pg=102
// page:2
var PAGE_IN_HREF_RE$1=new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})','i');var HAS_ALPHA_RE$1=/[a-z]/i;var IS_ALPHA_RE$1=/^[a-z]+$/i;var IS_DIGIT_RE$1=/^[0-9]+$/i;function pageNumFromUrl$1(url){var matches=url.match(PAGE_IN_HREF_RE$1);if(!matches)return null;var pageNum=parseInt(matches[6],10);// Return pageNum < 100, otherwise
// return null
return pageNum<100?pageNum:null;}function removeAnchor$1(url){return url.split('#')[0].replace(/\/$/,'');}function isGoodSegment$1(segment,index,firstSegmentHasLetters){var goodSegment=true;// If this is purely a number, and it's the first or second
// url_segment, it's probably a page number. Remove it.
if(index<2&&IS_DIGIT_RE$1.test(segment)&&segment.length<3){goodSegment=true;}// If this is the first url_segment and it's just "index",
// remove it
if(index===0&&segment.toLowerCase()==='index'){goodSegment=false;}// If our first or second url_segment is smaller than 3 characters,
// and the first url_segment had no alphas, remove it.
if(index<2&&segment.length<3&&!firstSegmentHasLetters){goodSegment=false;}return goodSegment;}// Take a URL, and return the article base of said URL. That is, no
// pagination data exists in it. Useful for comparing to other links
// that might have pagination data within them.
function articleBaseUrl$1(url,parsed){var parsedUrl=parsed||URL$1.parse(url);var protocol=parsedUrl.protocol,host=parsedUrl.host,path=parsedUrl.path;var firstSegmentHasLetters=false;var cleanedSegments=path.split('/').reverse().reduce(function(acc,rawSegment,index){var segment=rawSegment;// Split off and save anything that looks like a file type.
if(segment.includes('.')){var _segment$split=segment.split('.'),_segment$split2=_slicedToArray$1(_segment$split,2),possibleSegment=_segment$split2[0],fileExt=_segment$split2[1];if(IS_ALPHA_RE$1.test(fileExt)){segment=possibleSegment;}}// If our first or second segment has anything looking like a page
// number, remove it.
if(PAGE_IN_HREF_RE$1.test(segment)&&index<2){segment=segment.replace(PAGE_IN_HREF_RE$1,'');}// If we're on the first segment, check to see if we have any
// characters in it. The first segment is actually the last bit of
// the URL, and this will be helpful to determine if we're on a URL
// segment that looks like "/2/" for example.
if(index===0){firstSegmentHasLetters=HAS_ALPHA_RE$1.test(segment);}// If it's not marked for deletion, push it to cleaned_segments.
if(isGoodSegment$1(segment,index,firstSegmentHasLetters)){acc.push(segment);}return acc;},[]);return protocol+'//'+host+cleanedSegments.reverse().join('/');}// Given a string, return True if it appears to have an ending sentence
// within it, false otherwise.
var SENTENCE_END_RE$1=new RegExp('.( |$)');function hasSentenceEnd$1(text){return SENTENCE_END_RE$1.test(text);}function excerptContent$1(content){var words=arguments.length>1&&arguments[1]!==undefined?arguments[1]:10;return content.trim().split(/\s+/).slice(0,words).join(' ');}// Now that we have a top_candidate, look through the siblings of
scorePs$1($,weightNodes);scorePs$1($,weightNodes);return $;}// Now that we have a top_candidate, look through the siblings of
// it to see if any of them are decently scored. If they are, they
// may be split parts of the content (Like two divs, a preamble and
// a body.) Example:
@ -1418,7 +1423,7 @@ if(getWeight$1($(header))<0){return $header.remove();}return $header;});return $
function rewriteTopLevel$$1(article,$){// I'm not using context here because
// it's problematic when converting the
// top-level/root node - AP
$=convertNodeTo$$1($('html'),$,'div');$=convertNodeTo$$1($('body'),$,'div');return $;}/* eslint-disable */function absolutize$1($,rootUrl,attr,$content){$('['+attr+']',$content).each(function(_,node){var attrs=getAttrs$1(node);var url=attrs[attr];if(url){var absoluteUrl=URL$1.resolve(rootUrl,url);setAttr$1(node,attr,absoluteUrl);}});}function makeLinksAbsolute$$1($content,$,url){['href','src'].forEach(function(attr){return absolutize$1($,url,attr,$content);});return $content;}function textLength$1(text){return text.trim().replace(/\s+/g,' ').length;}// Determines what percentage of the text
$=convertNodeTo$$1($('html'),$,'div');$=convertNodeTo$$1($('body'),$,'div');return $;}function absolutize$1($,rootUrl,attr,$content){$('['+attr+']',$content).each(function(_,node){var attrs=getAttrs$1(node);var url=attrs[attr];if(url){var absoluteUrl=URL$1.resolve(rootUrl,url);setAttr$1(node,attr,absoluteUrl);}});}function makeLinksAbsolute$$1($content,$,url){['href','src'].forEach(function(attr){return absolutize$1($,url,attr,$content);});return $content;}function textLength$1(text){return text.trim().replace(/\s+/g,' ').length;}// Determines what percentage of the text
// in a node is link text
// Takes a node, returns a float
function linkDensity$1($node){var totalTextLength=textLength$1($node.text());var linkText=$node.find('a').text();var linkLength=textLength$1(linkText);if(totalTextLength>0){return linkLength/totalTextLength;}else if(totalTextLength===0&&linkLength>0){return 1;}return 0;}// Given a node type to search for, and a list of meta tag names to
@ -1447,21 +1452,21 @@ function stripTags$1(text,$){// Wrapping text in html element prevents errors wh
var cleanText=$('<span>'+text+'</span>').text();return cleanText===''?text:cleanText;}function withinComment$$1($node){var parents=$node.parents().toArray();var commentParent=parents.find(function(parent){var attrs=getAttrs$1(parent);var nodeClass=attrs.class,id=attrs.id;var classAndId=nodeClass+' '+id;return classAndId.includes('comment');});return commentParent!==undefined;}// Given a node, determine if it's article-like enough to return
// param: node (a cheerio node)
// return: boolean
function nodeIsSufficient$1($node){return $node.text().trim().length>=100;}function isWordpress$1($){return $(IS_WP_SELECTOR$1).length>0;}function getAttrs$1(node){var attribs=node.attribs,attributes=node.attributes;if(!attribs&&attributes){var attrs=_Reflect$ownKeys$1(attributes).reduce(function(acc,index){var attr=attributes[index];acc[attr.name]=attr.value;return acc;},{});return attrs;}return attribs;}function setAttr$1(node,attr,val){if(node.attribs){node.attribs[attr]=val;}else if(node.attributes){node.setAttribute(attr,val);}return node;}/* eslint-disable */function setAttrs$1(node,attrs){if(node.attribs){node.attribs=attrs;}else if(node.attributes){while(node.attributes.length>0){node.removeAttribute(node.attributes[0].name);}_Reflect$ownKeys$1(attrs).forEach(function(key){node.setAttribute(key,attrs[key]);});}return node;}// DOM manipulation
function nodeIsSufficient$1($node){return $node.text().trim().length>=100;}function isWordpress$1($){return $(IS_WP_SELECTOR$1).length>0;}function getAttrs$1(node){var attribs=node.attribs,attributes=node.attributes;if(!attribs&&attributes){var attrs=_Reflect$ownKeys$1(attributes).reduce(function(acc,index){var attr=attributes[index];if(!attr.name||!attr.value)return acc;acc[attr.name]=attr.value;return acc;},{});return attrs;}return attribs;}function setAttr$1(node,attr,val){if(node.attribs){node.attribs[attr]=val;}else if(node.attributes){node.setAttribute(attr,val);}return node;}function setAttrs$1(node,attrs){if(node.attribs){node.attribs=attrs;}else if(node.attributes){while(node.attributes.length>0){node.removeAttribute(node.attributes[0].name);}_Reflect$ownKeys$1(attrs).forEach(function(key){node.setAttribute(key,attrs[key]);});}return node;}// DOM manipulation
var IS_LINK=new RegExp('https?://','i');var IS_IMAGE=new RegExp('.(png|gif|jpe?g)','i');var TAGS_TO_REMOVE=['script','style','form'].join(',');// Convert all instances of images with potentially
// lazy loaded images into normal images.
// Many sites will have img tags with no source, or an image tag with a src
// attribute that a is a placeholer. We need to be able to properly fill in
// the src attribute so the images are no longer lazy loaded.
function convertLazyLoadedImages($){$('img').each(function(_,img){var attrs=getAttrs$1(img);_Reflect$ownKeys$1(attrs).forEach(function(attr){var value=attrs[attr];if(attr!=='src'&&IS_LINK.test(value)&&IS_IMAGE.test(value)){$(img).attr('src',value);}});});return $;}function isComment(index,node){return node.type==='comment';}function cleanComments($){$('*').first().contents().filter(isComment).remove();return $;}function clean($){$(TAGS_TO_REMOVE).remove();$=cleanComments($);return $;}var Resource={// Create a Resource.
function convertLazyLoadedImages($){$('img').each(function(_,img){var attrs=getAttrs$1(img);_Reflect$ownKeys$1(attrs).forEach(function(attr){var value=attrs[attr];if(attr!=='src'&&IS_LINK.test(value)&&IS_IMAGE.test(value)){$(img).attr('src',value);}});});return $;}function isComment(index,node){return node.type==='comment';}function cleanComments($){$.root().find('*').contents().filter(isComment).remove();return $;}function clean($){$(TAGS_TO_REMOVE).remove();$=cleanComments($);return $;}var Resource={// Create a Resource.
//
// :param url: The URL for the document we should retrieve.
// :param response: If set, use as the response rather than
// attempting to fetch it ourselves. Expects a
// string.
create:function create(url,preparedResponse,parsedUrl){var _this=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var result,validResponse;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:result=void 0;if(!preparedResponse){_context.next=6;break;}validResponse={statusMessage:'OK',statusCode:200,headers:{'content-type':'text/html','content-length':500}};result={body:preparedResponse,response:validResponse};_context.next=9;break;case 6:_context.next=8;return fetchResource$1(url,parsedUrl);case 8:result=_context.sent;case 9:if(!result.error){_context.next=11;break;}return _context.abrupt('return',result);case 11:return _context.abrupt('return',_this.generateDoc(result));case 12:case'end':return _context.stop();}}},_callee,_this);}))();},generateDoc:function generateDoc(_ref){var content=_ref.body,response=_ref.response;var contentType=response.headers['content-type'];// TODO: Implement is_text function from
create:function create(url,preparedResponse,parsedUrl){var _this=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var result,validResponse;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:result=void 0;if(!preparedResponse){_context.next=6;break;}validResponse={statusMessage:'OK',statusCode:200,headers:{'content-type':'text/html','content-length':500}};result={body:preparedResponse,response:validResponse};_context.next=9;break;case 6:_context.next=8;return fetchResource$1(url,parsedUrl);case 8:result=_context.sent;case 9:if(!result.error){_context.next=12;break;}result.failed=true;return _context.abrupt('return',result);case 12:return _context.abrupt('return',_this.generateDoc(result));case 13:case'end':return _context.stop();}}},_callee,_this);}))();},generateDoc:function generateDoc(_ref){var content=_ref.body,response=_ref.response;var contentType=response.headers['content-type'];// TODO: Implement is_text function from
// https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
if(!contentType.includes('html')&&!contentType.includes('text')){throw new Error('Content does not appear to be text.');}var $=cheerio$1.load(content,{normalizeWhitespace:true});if($('*').first().children().length===0){throw new Error('No children, likely a bad parse.');}$=normalizeMetaTags($);$=convertLazyLoadedImages($);$=clean($);return $;}};var merge=function merge(extractor,domains){return domains.reduce(function(acc,domain){acc[domain]=extractor;return acc;},{});};function mergeSupportedDomains(extractor){return extractor.supportedDomains?merge(extractor,[extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))):merge(extractor,[extractor.domain]);}var BloggerExtractor={domain:'blogspot.com',content:{// Blogger is insane and does not load its content
if(!contentType.includes('html')&&!contentType.includes('text')){throw new Error('Content does not appear to be text.');}var $=cheerio$1.load(content,{normalizeWhitespace:true});if($.root().children().length===0){throw new Error('No children, likely a bad parse.');}$=normalizeMetaTags($);$=convertLazyLoadedImages($);$=clean($);return $;}};var merge=function merge(extractor,domains){return domains.reduce(function(acc,domain){acc[domain]=extractor;return acc;},{});};function mergeSupportedDomains(extractor){return extractor.supportedDomains?merge(extractor,[extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))):merge(extractor,[extractor.domain]);}var BloggerExtractor={domain:'blogspot.com',content:{// Blogger is insane and does not load its content
// initially in the page, but it's all there
// in noscript
selectors:['.post-content noscript'],// Selectors to remove from the extracted content
@ -1476,7 +1481,7 @@ clean:['.ad','.single-related-story'],// Object of tranformations to make on mat
// the transformation.
transforms:{// Convert h1s to h2s
h1:'h2',// Convert lazy-loaded noscript images to figures
noscript:function noscript($node,$){if($.browser){var $children=$($node.text());if($children.length===1&&$children.get(0)!==undefined&&$children.get(0).tagName.toLowerCase()==='img'){return'figure';}}else{var _$children=$node.children();if(_$children.length===1&&_$children.get(0).tagName==='img'){return'figure';}}return null;}}},title:{selectors:['h1.lede-feature-title','h1.headline-primary','h1']},author:{selectors:['.by-authors','.lede-feature-author']},dek:{selectors:['.lede-feature-teaser']},date_published:{selectors:[['time.article-timestamp[datetime]','datetime'],'time.article-timestamp']}};var WikipediaExtractor={domain:'wikipedia.org',content:{selectors:['#mw-content-text'],defaultCleaner:false,// transform top infobox to an image with caption
noscript:function noscript($node,$){var $children=$.browser?$($node.text()):$node.children();if($children.length===1&&$children.get(0)!==undefined&&$children.get(0).tagName.toLowerCase()==='img'){return'figure';}return null;}}},title:{selectors:['h1.lede-feature-title','h1.headline-primary','h1']},author:{selectors:['.by-authors','.lede-feature-author']},dek:{selectors:['.lede-feature-teaser']},date_published:{selectors:[['time.article-timestamp[datetime]','datetime'],'time.article-timestamp']}};var WikipediaExtractor={domain:'wikipedia.org',content:{selectors:['#mw-content-text'],defaultCleaner:false,// transform top infobox to an image with caption
transforms:{'.infobox img':function infoboxImg($node){var $parent=$node.parents('.infobox');// Only prepend the first image in .infobox
if($parent.children('img').length===0){$parent.prepend($node);}},'.infobox caption':'figcaption','.infobox':'figure'},// Selectors to remove from the extracted content
clean:['.mw-editsection','figure tr, figure td, figure tbody','#toc','.navbox']},author:'Wikipedia Contributors',title:{selectors:['h2.title']},date_published:{selectors:['#footer-info-lastmod']}};var TwitterExtractor={domain:'twitter.com',content:{transforms:{// We're transforming essentially the whole page here.
@ -1485,7 +1490,7 @@ clean:['.mw-editsection','figure tr, figure td, figure tbody','#toc','.navbox']}
// it to fit our needs before we clean it up.
'.permalink[role=main]':function permalinkRoleMain($node,$){var tweets=$node.find('.tweet');var $tweetContainer=$('<div id="TWEETS_GO_HERE"></div>');$tweetContainer.append(tweets);$node.replaceWith($tweetContainer);},// Twitter wraps @ with s, which
// renders as a strikethrough
s:'span'},selectors:['.permalink[role=main]'],defaultCleaner:false,clean:['.stream-item-footer','button','.tweet-details-fixer']},author:{selectors:['.tweet.permalink-tweet .username']},date_published:{selectors:[['.permalink-tweet ._timestamp[data-time-ms]','data-time-ms']]}};var NYTimesExtractor={domain:'www.nytimes.com',title:{selectors:['.g-headline','h1.headline']},author:{selectors:[['meta[name="author"]','value'],'.g-byline','.byline']},content:{selectors:['div.g-blocks','article#story'],defaultCleaner:false,transforms:{'img.g-lazy':function imgGLazy($node){var src=$node.attr('src');// const widths = $node.attr('data-widths')
s:'span'},selectors:['.permalink[role=main]'],defaultCleaner:false,clean:['.stream-item-footer','button','.tweet-details-fixer']},author:{selectors:['.tweet.permalink-tweet .username']},date_published:{selectors:[['.permalink-tweet ._timestamp[data-time-ms]','data-time-ms']]}};var NYTimesExtractor={domain:'www.nytimes.com',title:{selectors:['.g-headline','h1.headline']},author:{selectors:[['meta[name="author"]','value'],'.g-byline','.byline']},content:{selectors:['div.g-blocks','article#story'],transforms:{'img.g-lazy':function imgGLazy($node){var src=$node.attr('src');// const widths = $node.attr('data-widths')
// .slice(1)
// .slice(0, -1)
// .split(',');
@ -1494,14 +1499,14 @@ s:'span'},selectors:['.permalink[role=main]'],defaultCleaner:false,clean:['.stre
// } else {
// width = '900';
// }
var width=640;src=src.replace('{{size}}',width);$node.attr('src',src);}},clean:['.ad','header#story-header','.story-body-1 .lede.video','.visually-hidden','#newsletter-promo','.promo','.comments-button','.hidden']},date_published:null,lead_image_url:null,dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
var width=640;src=src.replace('{{size}}',width);$node.attr('src',src);}},clean:['.ad','header#story-header','.story-body-1 .lede.video','.visually-hidden','#newsletter-promo','.promo','.comments-button','.hidden','.comments','.supplemental','.nocontent','.story-footer-links']},date_published:{selectors:[['meta[name="article:published"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
var TheAtlanticExtractor={domain:'www.theatlantic.com',title:{selectors:['h1.hed']},author:{selectors:['article#article .article-cover-extra .metadata .byline a']},content:{selectors:['.article-body'],// Is there anything in the content you selected that needs transformed
var TheAtlanticExtractor={domain:'www.theatlantic.com',title:{selectors:['h1.hed']},author:{selectors:['article#article .article-cover-extra .metadata .byline a']},content:{selectors:[['.article-cover figure.lead-img','.article-body'],'.article-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]},date_published:{selectors:[['time[itemProp="datePublished"]','datetime']]},lead_image_url:null,dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:['.partner-box']},date_published:{selectors:[['time[itemProp="datePublished"]','datetime']]},lead_image_url:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var NewYorkerExtractor={domain:'www.newyorker.com',title:{selectors:['h1.title']},author:{selectors:['.contributors']},content:{selectors:['div#articleBody','div.articleBody'],// Is there anything in the content you selected that needs transformed
@ -1509,7 +1514,7 @@ var NewYorkerExtractor={domain:'www.newyorker.com',title:{selectors:['h1.title']
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value'],['time[itemProp="datePublished"]','content']],timezone:'America/New_York'},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:['.dek','h2.dek']},next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var WiredExtractor={domain:'www.wired.com',title:{selectors:['h1.post-title']},author:{selectors:['a[rel="author"]']},content:{selectors:['article.content'],// Is there anything in the content you selected that needs transformed
@ -1517,7 +1522,7 @@ var WiredExtractor={domain:'www.wired.com',title:{selectors:['h1.post-title']},a
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.visually-hidden']},date_published:{selectors:[['meta[itemprop="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:['.visually-hidden']},date_published:{selectors:[['meta[itemprop="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var MSNExtractor={domain:'www.msn.com',title:{selectors:['h1']},author:{selectors:['span.authorname-txt']},content:{selectors:['div.richtext'],// Is there anything in the content you selected that needs transformed
@ -1525,7 +1530,7 @@ var MSNExtractor={domain:'www.msn.com',title:{selectors:['h1']},author:{selector
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['span.caption']},date_published:{selectors:['span.time']},lead_image_url:{selectors:[]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:['span.caption']},date_published:{selectors:['span.time']},lead_image_url:{selectors:[]},dek:{selectors:[]},next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var YahooExtractor={domain:'www.yahoo.com',title:{selectors:['header.canvas-header']},author:{selectors:['span.provider-name']},content:{selectors:[// enter content selectors
@ -1534,15 +1539,16 @@ var YahooExtractor={domain:'www.yahoo.com',title:{selectors:['header.canvas-head
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.figure-caption']},date_published:{selectors:[['time.date[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:['.figure-caption']},date_published:{selectors:[['time.date[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter dek selectors
]},next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var BuzzfeedExtractor={domain:'www.buzzfeed.com',title:{selectors:['h1[id="post-title"]']},author:{selectors:['a[data-action="user/username"]','byline__author']},content:{selectors:['#buzz_sub_buzz'],defaultCleaner:false,// Is there anything in the content you selected that needs transformed
var BuzzfeedExtractor={domain:'www.buzzfeed.com',title:{selectors:['h1[id="post-title"]']},author:{selectors:['a[data-action="user/username"]','byline__author']},content:{selectors:[['.longform_custom_header_media','#buzz_sub_buzz'],'#buzz_sub_buzz'],defaultCleaner:false,// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{h2:'b'},// Is there anything that is in the result that shouldn't be?
transforms:{h2:'b','div.longform_custom_header_media':function divLongform_custom_header_media($node){if($node.has('img')&&$node.has('.longform_header_image_source')){return'figure';}return null;},'figure.longform_custom_header_media .longform_header_image_source':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.instapaper_ignore','.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline','.share-box']},date_published:{selectors:['.buzz-datetime']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:['.instapaper_ignore','.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline','.share-box','.print']},date_published:{selectors:['.buzz-datetime']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var WikiaExtractor={domain:'fandom.wikia.com',title:{selectors:['h1.entry-title']},author:{selectors:['.author vcard','.fn']},content:{selectors:['.grid-content','.entry-content'],// Is there anything in the content you selected that needs transformed
@ -1550,7 +1556,7 @@ var WikiaExtractor={domain:'fandom.wikia.com',title:{selectors:['h1.entry-title'
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:null,excerpt:null};// Rename CustomExtractor
// to fit your publication
// (e.g., NYTimesExtractor)
var LittleThingsExtractor={domain:'www.littlethings.com',title:{selectors:['h1.post-title']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:[// enter content selectors
@ -1570,12 +1576,12 @@ transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['figcaption']},date_published:{selectors:[['.story-main-content .timestamp time[datetime]','datetime']]},lead_image_url:{selectors:[// enter lead_image_url selectors
['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};var DeadspinExtractor={domain:'deadspin.com',supportedDomains:['jezebel.com','lifehacker.com','kotaku.com','gizmodo.com','jalopnik.com','kinja.com'],title:{selectors:['h1.headline']},author:{selectors:['.author']},content:{selectors:['.post-content','.entry-content'],// Is there anything in the content you selected that needs transformed
['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:null,excerpt:null};var DeadspinExtractor={domain:'deadspin.com',supportedDomains:['jezebel.com','lifehacker.com','kotaku.com','gizmodo.com','jalopnik.com','kinja.com'],title:{selectors:['h1.headline']},author:{selectors:['.author']},content:{selectors:['.post-content','.entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'iframe.lazyload[data-recommend-id^="youtube://"]':function iframeLazyloadDataRecommendIdYoutube($node){var youtubeId=$node.attr('id').split('youtube-')[1];$node.attr('src','https://www.youtube.com/embed/'+youtubeId);}},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]},date_published:{selectors:[['time.updated[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
clean:['.magnifier','.lightbox']},date_published:{selectors:[['time.updated[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
]},next_page_url:{selectors:[// enter selectors
]},excerpt:{selectors:[// enter selectors
]}};// Rename CustomExtractor
@ -1586,7 +1592,7 @@ var BroadwayWorldExtractor={domain:'www.broadwayworld.com',title:{selectors:['h1
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]},date_published:{selectors:[['meta[itemprop=datePublished]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:{selectors:[// enter selectors
clean:[]},date_published:{selectors:[['meta[itemprop=datePublished]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:{selectors:[// enter selectors
]},excerpt:{selectors:[// enter selectors
]}};// Rename CustomExtractor
// to fit your publication
@ -1596,7 +1602,7 @@ var ApartmentTherapyExtractor={domain:'www.apartmenttherapy.com',title:{selector
transforms:{'div[data-render-react-id="images/LazyPicture"]':function divDataRenderReactIdImagesLazyPicture($node,$){var data=JSON.parse($node.attr('data-props'));var src=data.sources[0].src;var $img=$('<img />').attr('src',src);$node.replaceWith($img);}},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]},date_published:{selectors:[['.PostByline__timestamp[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name=description]','value']]},next_page_url:{selectors:[// enter selectors
clean:[]},date_published:{selectors:[['.PostByline__timestamp[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:{selectors:[// enter selectors
]},excerpt:{selectors:[// enter selectors
]}};var MediumExtractor={domain:'medium.com',supportedDomains:['trackchanges.postlight.com'],title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:['.section-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
@ -1608,7 +1614,60 @@ $node.attr('src','https://www.youtube.com/embed/'+youtubeId);var $parent=$node.p
clean:[]},date_published:{selectors:[['time[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
]},next_page_url:{selectors:[// enter selectors
]},excerpt:{selectors:[// enter selectors
]}};var CustomExtractors=_Object$freeze({BloggerExtractor:BloggerExtractor,NYMagExtractor:NYMagExtractor,WikipediaExtractor:WikipediaExtractor,TwitterExtractor:TwitterExtractor,NYTimesExtractor:NYTimesExtractor,TheAtlanticExtractor:TheAtlanticExtractor,NewYorkerExtractor:NewYorkerExtractor,WiredExtractor:WiredExtractor,MSNExtractor:MSNExtractor,YahooExtractor:YahooExtractor,BuzzfeedExtractor:BuzzfeedExtractor,WikiaExtractor:WikiaExtractor,LittleThingsExtractor:LittleThingsExtractor,PoliticoExtractor:PoliticoExtractor,DeadspinExtractor:DeadspinExtractor,BroadwayWorldExtractor:BroadwayWorldExtractor,ApartmentTherapyExtractor:ApartmentTherapyExtractor,MediumExtractor:MediumExtractor});var Extractors=_Object$keys(CustomExtractors).reduce(function(acc,key){var extractor=CustomExtractors[key];return _extends$1({},acc,mergeSupportedDomains(extractor));},{});// CLEAN AUTHOR CONSTANTS
]}};var WwwTmzComExtractor={domain:'www.tmz.com',title:{selectors:['.post-title-breadcrumb','h1','.headline']},author:'TMZ STAFF',date_published:{selectors:['.article-posted-date'],timezone:'America/Los_Angeles'},dek:{selectors:[// enter selectors
]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.article-content','.all-post-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.lightbox-link']}};var WwwWashingtonpostComExtractor={domain:'www.washingtonpost.com',title:{selectors:['h1','#topper-headline-wrapper']},author:{selectors:['.pb-byline']},date_published:{selectors:[['.pb-timestamp[itemprop="datePublished"]','content']]},dek:{selectors:[]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.article-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'div.inline-content':function divInlineContent($node){if($node.has('img,iframe,video').length>0){return'figure';}$node.remove();return null;},'.pb-caption':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.interstitial-link','.newsletter-inline-unit']}};var WwwHuffingtonpostComExtractor={domain:'www.huffingtonpost.com',title:{selectors:['h1.headline__title']},author:{selectors:['span.author-card__details__name']},date_published:{selectors:[['meta[name="article:modified_time"]','value'],['meta[name="article:published_time"]','value']]},dek:{selectors:['h2.headline__subtitle']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.entry__body'],defaultCleaner:false,// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{// 'div.top-media': ($node) => {
// const $figure = $node.children('figure');
// $node.replaceWith($figure);
// },
},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.pull-quote','.tag-cloud','.embed-asset','.below-entry','.entry-corrections','#suggested-story']}};var NewrepublicComExtractor={domain:'newrepublic.com',title:{selectors:['h1.article-headline','.minutes-primary h1.minute-title']},author:{selectors:['div.author-list','.minutes-primary h3.minute-byline']},date_published:{selectors:[['meta[name="article:published_time"]','value']],timezone:'America/New_York'},dek:{selectors:['h2.article-subhead']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.content-body','.minutes-primary div.content-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['aside']}};var MoneyCnnComExtractor={domain:'money.cnn.com',title:{selectors:['.article-title']},author:{selectors:['.byline a']},date_published:{selectors:[['meta[name="date"]','value']],timezone:'GMT'},dek:{selectors:['#storytext h2']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['#storytext'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.inStoryHeading']}};var WwwThevergeComExtractor={domain:'www.theverge.com',title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},dek:{selectors:['h2.p-dek']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[// feature template multi-match
['.c-entry-hero .e-image','.c-entry-intro','.c-entry-content'],// regular post multi-match
['.e-image--hero','.c-entry-content'],// feature template fallback
'.l-wrapper .l-feature',// regular post fallback
'div.c-entry-content'],// Transform lazy-loaded images
transforms:{noscript:function noscript($node){var $children=$node.children();if($children.length===1&&$children.get(0).tagName==='img'){return'span';}return null;}},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.aside','img.c-dynamic-image']}};var WwwCnnComExtractor={domain:'www.cnn.com',title:{selectors:['h1.pg-headline','h1']},author:{selectors:['.metadata__byline__author']},date_published:{selectors:[['meta[name="pubdate"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[// a more specific selector to grab the lead image and the body
['.media__video--thumbnail','.zn-body-text'],// a fallback for the above
'.zn-body-text','div[itemprop="articleBody"]'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'.zn-body__paragraph, .el__leafmedia--sourced-paragraph':function znBody__paragraphEl__leafmediaSourcedParagraph($node){var $text=$node.html();if($text){return'p';}return null;},// this transform cleans the short, all-link sections linking
// to related content but not marked as such in any way.
'.zn-body__paragraph':function znBody__paragraph($node){if($node.has('a')){if($node.text().trim()===$node.find('a').text().trim()){$node.remove();}}},'.media__video--thumbnail':'figure'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};var WwwAolComExtractor={domain:'www.aol.com',title:{selectors:['h1.p-article__title']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:['.p-article__byline__date'],timezone:'America/New_York'},dek:{selectors:[// enter selectors
]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.article-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};var CustomExtractors=_Object$freeze({BloggerExtractor:BloggerExtractor,NYMagExtractor:NYMagExtractor,WikipediaExtractor:WikipediaExtractor,TwitterExtractor:TwitterExtractor,NYTimesExtractor:NYTimesExtractor,TheAtlanticExtractor:TheAtlanticExtractor,NewYorkerExtractor:NewYorkerExtractor,WiredExtractor:WiredExtractor,MSNExtractor:MSNExtractor,YahooExtractor:YahooExtractor,BuzzfeedExtractor:BuzzfeedExtractor,WikiaExtractor:WikiaExtractor,LittleThingsExtractor:LittleThingsExtractor,PoliticoExtractor:PoliticoExtractor,DeadspinExtractor:DeadspinExtractor,BroadwayWorldExtractor:BroadwayWorldExtractor,ApartmentTherapyExtractor:ApartmentTherapyExtractor,MediumExtractor:MediumExtractor,WwwTmzComExtractor:WwwTmzComExtractor,WwwWashingtonpostComExtractor:WwwWashingtonpostComExtractor,WwwHuffingtonpostComExtractor:WwwHuffingtonpostComExtractor,NewrepublicComExtractor:NewrepublicComExtractor,MoneyCnnComExtractor:MoneyCnnComExtractor,WwwThevergeComExtractor:WwwThevergeComExtractor,WwwCnnComExtractor:WwwCnnComExtractor,WwwAolComExtractor:WwwAolComExtractor});var Extractors=_Object$keys(CustomExtractors).reduce(function(acc,key){var extractor=CustomExtractors[key];return _extends$1({},acc,mergeSupportedDomains(extractor));},{});// CLEAN AUTHOR CONSTANTS
var CLEAN_AUTHOR_RE=/^\s*(posted |written )?by\s*:?\s*(.*)/i;// author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
// CLEAN DEK CONSTANTS
var TEXT_LINK_RE=new RegExp('http(s)?://','i');// An ordered list of meta tag names that denote likely article deks.
@ -1626,24 +1685,26 @@ var TEXT_LINK_RE=new RegExp('http(s)?://','i');// An ordered list of meta tag na
// Should be more restrictive than not, as a failed dek can be pretty
// detrimental to the aesthetics of an article.
// CLEAN DATE PUBLISHED CONSTANTS
var MS_DATE_STRING=/^\d{13}$/i;var SEC_DATE_STRING=/^\d{10}$/i;var CLEAN_DATE_STRING_RE=/^\s*published\s*:?\s*(.*)/i;var TIME_MERIDIAN_SPACE_RE=/(.*\d)(am|pm)(.*)/i;var TIME_MERIDIAN_DOTS_RE=/\.m\./i;var months=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'];var allMonths=months.join('|');var timestamp1='[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';var timestamp2='[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';var SPLIT_DATE_STRING=new RegExp('('+timestamp1+')|('+timestamp2+')|([0-9]{1,4})|('+allMonths+')','ig');// CLEAN TITLE CONSTANTS
var MS_DATE_STRING=/^\d{13}$/i;var SEC_DATE_STRING=/^\d{10}$/i;var CLEAN_DATE_STRING_RE=/^\s*published\s*:?\s*(.*)/i;var TIME_MERIDIAN_SPACE_RE=/(.*\d)(am|pm)(.*)/i;var TIME_MERIDIAN_DOTS_RE=/\.m\./i;var months=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'];var allMonths=months.join('|');var timestamp1='[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';var timestamp2='[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';var timestamp3='-[0-9]{3,4}$';var SPLIT_DATE_STRING=new RegExp('('+timestamp1+')|('+timestamp2+')|('+timestamp3+')|([0-9]{1,4})|('+allMonths+')','ig');// 2016-11-22T08:57-500
// Check if datetime string has an offset at the end
var TIME_WITH_OFFSET_RE=/-\d{3,4}$/;// CLEAN TITLE CONSTANTS
// A regular expression that will match separating characters on a
// title, that usually denote breadcrumbs or something similar.
var TITLE_SPLITTERS_RE=/(: | - | \| )/g;var DOMAIN_ENDINGS_RE=new RegExp('.com$|.net$|.org$|.co.uk$','g');// Take an author string (like 'By David Smith ') and clean it to
// just the name(s): 'David Smith'.
function cleanAuthor(author){return author.replace(CLEAN_AUTHOR_RE,'$2').trim();}function clean$1(leadImageUrl){leadImageUrl=leadImageUrl.trim();if(validUrl$1.isWebUri(leadImageUrl)){return leadImageUrl;}return null;}// Take a dek HTML fragment, and return the cleaned version of it.
function cleanAuthor(author){return normalizeSpaces$1(author.replace(CLEAN_AUTHOR_RE,'$2').trim());}function clean$1(leadImageUrl){leadImageUrl=leadImageUrl.trim();if(validUrl$1.isWebUri(leadImageUrl)){return leadImageUrl;}return null;}// Take a dek HTML fragment, and return the cleaned version of it.
// Return None if the dek wasn't good enough.
function cleanDek(dek,_ref){var $=_ref.$,excerpt=_ref.excerpt;// Sanity check that we didn't get too short or long of a dek.
if(dek.length>1000||dek.length<5)return null;// Check that dek isn't the same as excerpt
if(excerpt&&excerptContent$1(excerpt,10)===excerptContent$1(dek,10))return null;var dekText=stripTags$1(dek,$);// Plain text links shouldn't exist in the dek. If we have some, it's
// not a good dek - bail.
if(TEXT_LINK_RE.test(dekText))return null;return dekText.trim();}// Is there a compelling reason to use moment here?
if(TEXT_LINK_RE.test(dekText))return null;return normalizeSpaces$1(dekText.trim());}// Is there a compelling reason to use moment here?
// Mostly only being used for the isValid() method,
// but could just check for 'Invalid Date' string.
function cleanDateString(dateString){return(dateString.match(SPLIT_DATE_STRING)||[]).join(' ').replace(TIME_MERIDIAN_DOTS_RE,'m').replace(TIME_MERIDIAN_SPACE_RE,'$1 $2 $3').replace(CLEAN_DATE_STRING_RE,'$1').trim();}// Take a date published string, and hopefully return a date out of
function cleanDateString(dateString){return(dateString.match(SPLIT_DATE_STRING)||[]).join(' ').replace(TIME_MERIDIAN_DOTS_RE,'m').replace(TIME_MERIDIAN_SPACE_RE,'$1 $2 $3').replace(CLEAN_DATE_STRING_RE,'$1').trim();}function createDate(dateString,timezone){if(TIME_WITH_OFFSET_RE.test(dateString)){return moment(new Date(dateString));}return timezone?moment.tz(dateString,parseFormat(dateString),timezone):moment(dateString,parseFormat(dateString));}// Take a date published string, and hopefully return a date out of
// it. Return none if we fail.
function cleanDatePublished(dateString){// If string is in milliseconds or seconds, convert to int
if(MS_DATE_STRING.test(dateString)||SEC_DATE_STRING.test(dateString)){dateString=parseInt(dateString,10);}var date=moment$1(new Date(dateString));if(!date.isValid()){dateString=cleanDateString(dateString);date=moment$1(new Date(dateString));}return date.isValid()?date.toISOString():null;}// Clean our article content, returning a new, cleaned node.
function cleanDatePublished(dateString){var _ref=arguments.length>1&&arguments[1]!==undefined?arguments[1]:{},timezone=_ref.timezone;// If string is in milliseconds or seconds, convert to int and return
if(MS_DATE_STRING.test(dateString)||SEC_DATE_STRING.test(dateString)){return new Date(parseInt(dateString,10)).toISOString();}var date=createDate(dateString,timezone);if(!date.isValid()){dateString=cleanDateString(dateString);date=createDate(dateString,timezone);}return date.isValid()?date.toISOString():null;}// Clean our article content, returning a new, cleaned node.
function extractCleanNode(article,_ref){var $=_ref.$,_ref$cleanConditional=_ref.cleanConditionally,cleanConditionally=_ref$cleanConditional===undefined?true:_ref$cleanConditional,_ref$title=_ref.title,title=_ref$title===undefined?'':_ref$title,_ref$url=_ref.url,url=_ref$url===undefined?'':_ref$url,_ref$defaultCleaner=_ref.defaultCleaner,defaultCleaner=_ref$defaultCleaner===undefined?true:_ref$defaultCleaner;// Rewrite the tag name to div if it's a top level node like body or
// html to avoid later complications with multiple body tags.
rewriteTopLevel$$1(article,$);// Drop small images and spacer images
@ -1671,7 +1732,7 @@ if(TITLE_SPLITTERS_RE.test(title)){title=resolveSplitTitle(title,url);}// Final
// if (title.length > 150 || title.length < 15) {
if(title.length>150){// If we did, return h1 from the document if it exists
var h1=$('h1');if(h1.length===1){title=h1.text();}}// strip any html tags in the title text
return stripTags$1(title,$).trim();}function extractBreadcrumbTitle(splitTitle,text){// This must be a very breadcrumbed title, like:
return normalizeSpaces$1(stripTags$1(title,$).trim());}function extractBreadcrumbTitle(splitTitle,text){// This must be a very breadcrumbed title, like:
// The Best Gadgets on Earth : Bits : Blogs : NYTimes.com
// NYTimes - Blogs - Bits - The Best Gadgets on Earth
if(splitTitle.length>=6){var _ret=function(){// Look to see if we can find a breadcrumb splitter that happens
@ -1727,11 +1788,11 @@ if(opts.stripUnlikelyCandidates){$=stripUnlikelyCandidates$1($);}$=convertToPara
//
// cleanConditionally: Clean the node to return of some
// superfluous content. Things like forms, ads, etc.
extract:function extract(_ref,opts){var $=_ref.$,html=_ref.html,title=_ref.title,url=_ref.url,cheerio$$1=_ref.cheerio;opts=_extends$1({},this.defaultOpts,opts);$=$||cheerio$$1.load(html);// Cascade through our extraction-specific opts in an ordered fashion,
extract:function extract(_ref,opts){var $=_ref.$,html=_ref.html,title=_ref.title,url=_ref.url;opts=_extends$1({},this.defaultOpts,opts);$=$||cheerio$1.load(html);// Cascade through our extraction-specific opts in an ordered fashion,
// turning them off as we try to extract content.
var node=this.getContentNode($,title,url,opts);if(nodeIsSufficient$1(node)){return this.cleanAndReturnNode(node,$);}// We didn't succeed on first pass, one by one disable our
// extraction opts and try again.
var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(_Reflect$ownKeys$1(opts).filter(function(k){return opts[k]===true;})),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var key=_step.value;opts[key]=false;$=cheerio$$1.load(html);node=this.getContentNode($,title,url,opts);if(nodeIsSufficient$1(node)){break;}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return this.cleanAndReturnNode(node,$);},// Get node given current options
var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(_Reflect$ownKeys$1(opts).filter(function(k){return opts[k]===true;})),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var key=_step.value;opts[key]=false;$=cheerio$1.load(html);node=this.getContentNode($,title,url,opts);if(nodeIsSufficient$1(node)){break;}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return this.cleanAndReturnNode(node,$);},// Get node given current options
getContentNode:function getContentNode($,title,url,opts){return extractCleanNode(extractBestNode($,opts),{$:$,cleanConditionally:opts.cleanConditionally,title:title,url:url});},// Once we got here, either we're at our last-resort node, or
// we broke early. Make sure we at least have -something- before we
// move forward.
@ -1854,7 +1915,7 @@ function scoreAttr($img){if($img.attr('alt')){return 5;}return 0;}// Look throug
// container elements, give a bonus if we find them
function scoreByParents($img){var score=0;var $figParent=$img.parents('figure').first();if($figParent.length===1){score+=25;}var $parent=$img.parent();var $gParent=void 0;if($parent.length===1){$gParent=$parent.parent();}[$parent,$gParent].forEach(function($node){if(PHOTO_HINTS_RE$1$1.test(getSig($node))){score+=15;}});return score;}// Look at our immediate sibling and see if it looks like it's a
// caption. Bonus if so.
function scoreBySibling($img){var score=0;var $sibling=$img.next();var sibling=$sibling.get(0);if(sibling&&sibling.tagName==='figcaption'){score+=25;}if(PHOTO_HINTS_RE$1$1.test(getSig($sibling))){score+=15;}return score;}function scoreByDimensions($img){var score=0;var width=parseFloat($img.attr('width'));var height=parseFloat($img.attr('height'));var src=$img.attr('src');// Penalty for skinny images
function scoreBySibling($img){var score=0;var $sibling=$img.next();var sibling=$sibling.get(0);if(sibling&&sibling.tagName.toLowerCase()==='figcaption'){score+=25;}if(PHOTO_HINTS_RE$1$1.test(getSig($sibling))){score+=15;}return score;}function scoreByDimensions($img){var score=0;var width=parseFloat($img.attr('width'));var height=parseFloat($img.attr('height'));var src=$img.attr('src');// Penalty for skinny images
if(width&&width<=50){score-=50;}// Penalty for short images
if(height&&height<=50){score-=50;}if(width&&height&&!src.includes('sprite')){var area=width*height;if(area<5000){// Smaller than 50 x 100
score-=100;}else{score+=Math.round(area/1000);}}return score;}function scoreByPosition($imgs,index){return $imgs.length/2-index;}// Given a resource, try to find the lead image URL from within
@ -2102,8 +2163,9 @@ if(NEXT_LINK_TEXT_RE$1.test(linkData)){return-65;}}return 0;}function makeBaseRe
var scoredPages=links.reduce(function(possiblePages,link){// Remove any anchor data since we don't do a good job
// standardizing URLs (it's hard), we're going to do
// some checking with and without a trailing slash
var attrs=getAttrs$1(link);var href=removeAnchor$1(attrs.href);var $link=$(link);var linkText=$link.text();if(!shouldScore(href,articleUrl,baseUrl,parsedUrl,linkText,previousUrls)){return possiblePages;}// ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
if(!possiblePages[href]){possiblePages[href]={score:0,linkText:linkText,href:href};}else{possiblePages[href].linkText=possiblePages[href].linkText+'|'+linkText;}var possiblePage=possiblePages[href];var linkData=makeSig($link,linkText);var pageNum=pageNumFromUrl$1(href);var score=scoreBaseUrl(href,baseRegex);score+=scoreNextLinkText(linkData);score+=scoreCapLinks(linkData);score+=scorePrevLink(linkData);score+=scoreByParents$1($link);score+=scoreExtraneousLinks(href);score+=scorePageInLink(pageNum,isWp);score+=scoreLinkText(linkText,pageNum);score+=scoreSimilarity(score,articleUrl,href);possiblePage.score=score;return possiblePages;},{});return _Reflect$ownKeys$1(scoredPages).length===0?null:scoredPages;}/* eslint-disable */// Looks for and returns next page url
var attrs=getAttrs$1(link);// if href is undefined, return
if(!attrs.href)return possiblePages;var href=removeAnchor$1(attrs.href);var $link=$(link);var linkText=$link.text();if(!shouldScore(href,articleUrl,baseUrl,parsedUrl,linkText,previousUrls)){return possiblePages;}// ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
if(!possiblePages[href]){possiblePages[href]={score:0,linkText:linkText,href:href};}else{possiblePages[href].linkText=possiblePages[href].linkText+'|'+linkText;}var possiblePage=possiblePages[href];var linkData=makeSig($link,linkText);var pageNum=pageNumFromUrl$1(href);var score=scoreBaseUrl(href,baseRegex);score+=scoreNextLinkText(linkData);score+=scoreCapLinks(linkData);score+=scorePrevLink(linkData);score+=scoreByParents$1($link);score+=scoreExtraneousLinks(href);score+=scorePageInLink(pageNum,isWp);score+=scoreLinkText(linkText,pageNum);score+=scoreSimilarity(score,articleUrl,href);possiblePage.score=score;return possiblePages;},{});return _Reflect$ownKeys$1(scoredPages).length===0?null:scoredPages;}// Looks for and returns next page url
// for multi-page articles
var GenericNextPageUrlExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,parsedUrl=_ref.parsedUrl,_ref$previousUrls=_ref.previousUrls,previousUrls=_ref$previousUrls===undefined?[]:_ref$previousUrls;parsedUrl=parsedUrl||URL$1.parse(url);var articleUrl=removeAnchor$1(url);var baseUrl=articleBaseUrl$1(url,parsedUrl);var links=$('a[href]').toArray();var scoredLinks=scoreLinks({links:links,articleUrl:articleUrl,baseUrl:baseUrl,parsedUrl:parsedUrl,$:$,previousUrls:previousUrls});// If no links were scored, return null
if(!scoredLinks)return null;// now that we've scored all possible pages,
@ -2112,37 +2174,48 @@ var topPage=_Reflect$ownKeys$1(scoredLinks).reduce(function(acc,link){var scored
// so we fail.
if(topPage.score>=50){return topPage.href;}return null;}};var CANONICAL_META_SELECTORS=['og:url'];function parseDomain(url){var parsedUrl=URL$1.parse(url);var hostname=parsedUrl.hostname;return hostname;}function result(url){return{url:url,domain:parseDomain(url)};}var GenericUrlExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,metaCache=_ref.metaCache;var $canonical=$('link[rel=canonical]');if($canonical.length!==0){var href=$canonical.attr('href');if(href){return result(href);}}var metaUrl=extractFromMeta$$1($,CANONICAL_META_SELECTORS,metaCache);if(metaUrl){return result(metaUrl);}return result(url);}};var EXCERPT_META_SELECTORS=['og:description','twitter:description'];function clean$2(content,$){var maxLength=arguments.length>2&&arguments[2]!==undefined?arguments[2]:200;content=content.replace(/[\s\n]+/g,' ').trim();return ellipsize$1(content,maxLength,{ellipse:'&hellip;'});}var GenericExcerptExtractor={extract:function extract(_ref){var $=_ref.$,content=_ref.content,metaCache=_ref.metaCache;var excerpt=extractFromMeta$$1($,EXCERPT_META_SELECTORS,metaCache);if(excerpt){return clean$2(stripTags$1(excerpt,$));}// Fall back to excerpting from the extracted content
var maxLength=200;var shortContent=content.slice(0,maxLength*5);return clean$2($(shortContent).text(),$,maxLength);}};var GenericWordCountExtractor={extract:function extract(_ref){var content=_ref.content;var $=cheerio$1.load(content);var $content=$('div').first();var text=normalizeSpaces$1($content.text());return text.split(/\s/).length;}};var GenericExtractor={// This extractor is the default for all domains
domain:'*',title:GenericTitleExtractor.extract,date_published:GenericDatePublishedExtractor.extract,author:GenericAuthorExtractor.extract,content:GenericContentExtractor.extract.bind(GenericContentExtractor),lead_image_url:GenericLeadImageUrlExtractor.extract,dek:GenericDekExtractor.extract,next_page_url:GenericNextPageUrlExtractor.extract,url_and_domain:GenericUrlExtractor.extract,excerpt:GenericExcerptExtractor.extract,word_count:GenericWordCountExtractor.extract,direction:function direction(_ref){var title=_ref.title;return stringDirection$1.getDirection(title);},extract:function extract(options){var html=options.html,cheerio$$1=options.cheerio,$=options.$;if(html&&!$){var loaded=cheerio$$1.load(html);options.$=loaded;}var title=this.title(options);var date_published=this.date_published(options);var author=this.author(options);var content=this.content(_extends$1({},options,{title:title}));var lead_image_url=this.lead_image_url(_extends$1({},options,{content:content}));var dek=this.dek(_extends$1({},options,{content:content}));var next_page_url=this.next_page_url(options);var excerpt=this.excerpt(_extends$1({},options,{content:content}));var word_count=this.word_count(_extends$1({},options,{content:content}));var direction=this.direction({title:title});var _url_and_domain=this.url_and_domain(options),url=_url_and_domain.url,domain=_url_and_domain.domain;return{title:title,author:author,date_published:date_published||null,dek:dek,lead_image_url:lead_image_url,content:content,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};function getExtractor(url,parsedUrl){parsedUrl=parsedUrl||URL$1.parse(url);var _parsedUrl=parsedUrl,hostname=_parsedUrl.hostname;var baseDomain=hostname.split('.').slice(-2).join('.');return Extractors[hostname]||Extractors[baseDomain]||GenericExtractor;}/* eslint-disable */// Remove elements by an array of selectors
domain:'*',title:GenericTitleExtractor.extract,date_published:GenericDatePublishedExtractor.extract,author:GenericAuthorExtractor.extract,content:GenericContentExtractor.extract.bind(GenericContentExtractor),lead_image_url:GenericLeadImageUrlExtractor.extract,dek:GenericDekExtractor.extract,next_page_url:GenericNextPageUrlExtractor.extract,url_and_domain:GenericUrlExtractor.extract,excerpt:GenericExcerptExtractor.extract,word_count:GenericWordCountExtractor.extract,direction:function direction(_ref){var title=_ref.title;return stringDirection$1.getDirection(title);},extract:function extract(options){var html=options.html,$=options.$;if(html&&!$){var loaded=cheerio$1.load(html);options.$=loaded;}var title=this.title(options);var date_published=this.date_published(options);var author=this.author(options);var content=this.content(_extends$1({},options,{title:title}));var lead_image_url=this.lead_image_url(_extends$1({},options,{content:content}));var dek=this.dek(_extends$1({},options,{content:content}));var next_page_url=this.next_page_url(options);var excerpt=this.excerpt(_extends$1({},options,{content:content}));var word_count=this.word_count(_extends$1({},options,{content:content}));var direction=this.direction({title:title});var _url_and_domain=this.url_and_domain(options),url=_url_and_domain.url,domain=_url_and_domain.domain;return{title:title,author:author,date_published:date_published||null,dek:dek,lead_image_url:lead_image_url,content:content,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};function getExtractor(url,parsedUrl){parsedUrl=parsedUrl||URL$1.parse(url);var _parsedUrl=parsedUrl,hostname=_parsedUrl.hostname;var baseDomain=hostname.split('.').slice(-2).join('.');return Extractors[hostname]||Extractors[baseDomain]||GenericExtractor;}// Remove elements by an array of selectors
function cleanBySelectors($content,$,_ref){var clean=_ref.clean;if(!clean)return $content;$(clean.join(','),$content).remove();return $content;}// Transform matching elements
function transformElements($content,$,_ref2){var transforms=_ref2.transforms;if(!transforms)return $content;_Reflect$ownKeys$1(transforms).forEach(function(key){var $matches=$(key,$content);var value=transforms[key];// If value is a string, convert directly
if(typeof value==='string'){$matches.each(function(index,node){convertNodeTo$$1($(node),$,transforms[key]);});}else if(typeof value==='function'){// If value is function, apply function to node
$matches.each(function(index,node){var result=value($(node),$);// If function returns a string, convert node to that value
if(typeof result==='string'){convertNodeTo$$1($(node),$,result);}});}});return $content;}function findMatchingSelector($,selectors){return selectors.find(function(selector){if(Array.isArray(selector)){var _selector=_slicedToArray$1(selector,2),s=_selector[0],attr=_selector[1];return $(s).length===1&&$(s).attr(attr)&&$(s).attr(attr).trim()!=='';}// debugger
return $(selector).length===1&&$(selector).text().trim()!=='';});}function select(opts){var $=opts.$,type=opts.type,extractionOpts=opts.extractionOpts,_opts$extractHtml=opts.extractHtml,extractHtml=_opts$extractHtml===undefined?false:_opts$extractHtml;// Skip if there's not extraction for this type
if(typeof result==='string'){convertNodeTo$$1($(node),$,result);}});}});return $content;}function findMatchingSelector($,selectors,extractHtml){return selectors.find(function(selector){if(Array.isArray(selector)){if(extractHtml){return selector.reduce(function(acc,s){return acc&&$(s).length>0;},true);}var _selector=_slicedToArray$1(selector,2),s=_selector[0],attr=_selector[1];return $(s).length===1&&$(s).attr(attr)&&$(s).attr(attr).trim()!=='';}return $(selector).length===1&&$(selector).text().trim()!=='';});}function select(opts){var $=opts.$,type=opts.type,extractionOpts=opts.extractionOpts,_opts$extractHtml=opts.extractHtml,extractHtml=_opts$extractHtml===undefined?false:_opts$extractHtml;// Skip if there's not extraction for this type
if(!extractionOpts)return null;// If a string is hardcoded for a type (e.g., Wikipedia
// contributors), return the string
if(typeof extractionOpts==='string')return extractionOpts;var selectors=extractionOpts.selectors,_extractionOpts$defau=extractionOpts.defaultCleaner,defaultCleaner=_extractionOpts$defau===undefined?true:_extractionOpts$defau;var matchingSelector=findMatchingSelector($,selectors);if(!matchingSelector)return null;// Declaring result; will contain either
if(typeof extractionOpts==='string')return extractionOpts;var selectors=extractionOpts.selectors,_extractionOpts$defau=extractionOpts.defaultCleaner,defaultCleaner=_extractionOpts$defau===undefined?true:_extractionOpts$defau;var matchingSelector=findMatchingSelector($,selectors,extractHtml);if(!matchingSelector)return null;// Declaring result; will contain either
// text or html, which will be cleaned
// by the appropriate cleaner type
// If the selector type requests html as its return type
// transform and clean the element with provided selectors
if(extractHtml){var $content=$(matchingSelector);// Wrap in div so transformation can take place on root element
var $content=void 0;if(extractHtml){// If matching selector is an array, we're considering this a
// multi-match selection, which allows the parser to choose several
// selectors to include in the result. Note that all selectors in the
// array must match in order for this selector to trigger
if(Array.isArray(matchingSelector)){(function(){$content=$(matchingSelector.join(','));var $wrapper=$('<div></div>');$content.each(function(index,element){$wrapper.append(element);});$content=$wrapper;})();}else{$content=$(matchingSelector);}// Wrap in div so transformation can take place on root element
$content.wrap($('<div></div>'));$content=$content.parent();$content=transformElements($content,$,extractionOpts);$content=cleanBySelectors($content,$,extractionOpts);$content=Cleaners[type]($content,_extends$1({},opts,{defaultCleaner:defaultCleaner}));return $.html($content);}var result=void 0;// if selector is an array (e.g., ['img', 'src']),
// extract the attr
if(Array.isArray(matchingSelector)){var _matchingSelector=_slicedToArray$1(matchingSelector,2),selector=_matchingSelector[0],attr=_matchingSelector[1];result=$(selector).attr(attr).trim();}else{result=$(matchingSelector).text().trim();}// Allow custom extractor to skip default cleaner
// for this type; defaults to true
if(defaultCleaner){return Cleaners[type](result,opts);}return result;}function extractResult(opts){var type=opts.type,extractor=opts.extractor,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;var result=select(_extends$1({},opts,{extractionOpts:extractor[type]}));// If custom parser succeeds, return the result
if(defaultCleaner){return Cleaners[type](result,_extends$1({},opts,extractionOpts));}return result;}function extractResult(opts){var type=opts.type,extractor=opts.extractor,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;var result=select(_extends$1({},opts,{extractionOpts:extractor[type]}));// If custom parser succeeds, return the result
if(result){return result;}// If nothing matches the selector, and fallback is enabled,
// run the Generic extraction
if(fallback)return GenericExtractor[type](opts);return null;}var RootExtractor={extract:function extract(){var extractor=arguments.length>0&&arguments[0]!==undefined?arguments[0]:GenericExtractor;var opts=arguments[1];var _opts=opts,contentOnly=_opts.contentOnly,extractedTitle=_opts.extractedTitle;// This is the generic extractor. Run its extract method
if(extractor.domain==='*')return extractor.extract(opts);opts=_extends$1({},opts,{extractor:extractor});if(contentOnly){var _content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:extractedTitle}));return{content:_content};}var title=extractResult(_extends$1({},opts,{type:'title'}));var date_published=extractResult(_extends$1({},opts,{type:'date_published'}));var author=extractResult(_extends$1({},opts,{type:'author'}));var next_page_url=extractResult(_extends$1({},opts,{type:'next_page_url'}));var content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:title}));var lead_image_url=extractResult(_extends$1({},opts,{type:'lead_image_url',content:content}));var excerpt=extractResult(_extends$1({},opts,{type:'excerpt',content:content}));var dek=extractResult(_extends$1({},opts,{type:'dek',content:content,excerpt:excerpt}));var word_count=extractResult(_extends$1({},opts,{type:'word_count',content:content}));var direction=extractResult(_extends$1({},opts,{type:'direction',title:title}));var _ref3=extractResult(_extends$1({},opts,{type:'url_and_domain'}))||{url:null,domain:null},url=_ref3.url,domain=_ref3.domain;return{title:title,content:content,author:author,date_published:date_published,lead_image_url:lead_image_url,dek:dek,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};var collectAllPages=function(){var _ref=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(_ref2){var next_page_url=_ref2.next_page_url,html=_ref2.html,$=_ref2.$,metaCache=_ref2.metaCache,result=_ref2.result,Extractor=_ref2.Extractor,title=_ref2.title,url=_ref2.url,cheerio$$1=_ref2.cheerio;var pages,previousUrls,extractorOpts,nextPageResult,word_count;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:// At this point, we've fetched just the first page
if(extractor.domain==='*')return extractor.extract(opts);opts=_extends$1({},opts,{extractor:extractor});if(contentOnly){var _content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:extractedTitle}));return{content:_content};}var title=extractResult(_extends$1({},opts,{type:'title'}));var date_published=extractResult(_extends$1({},opts,{type:'date_published'}));var author=extractResult(_extends$1({},opts,{type:'author'}));var next_page_url=extractResult(_extends$1({},opts,{type:'next_page_url'}));var content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:title}));var lead_image_url=extractResult(_extends$1({},opts,{type:'lead_image_url',content:content}));var excerpt=extractResult(_extends$1({},opts,{type:'excerpt',content:content}));var dek=extractResult(_extends$1({},opts,{type:'dek',content:content,excerpt:excerpt}));var word_count=extractResult(_extends$1({},opts,{type:'word_count',content:content}));var direction=extractResult(_extends$1({},opts,{type:'direction',title:title}));var _ref3=extractResult(_extends$1({},opts,{type:'url_and_domain'}))||{url:null,domain:null},url=_ref3.url,domain=_ref3.domain;return{title:title,content:content,author:author,date_published:date_published,lead_image_url:lead_image_url,dek:dek,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};var collectAllPages=function(){var _ref=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(_ref2){var next_page_url=_ref2.next_page_url,html=_ref2.html,$=_ref2.$,metaCache=_ref2.metaCache,result=_ref2.result,Extractor=_ref2.Extractor,title=_ref2.title,url=_ref2.url;var pages,previousUrls,extractorOpts,nextPageResult,word_count;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:// At this point, we've fetched just the first page
pages=1;previousUrls=[removeAnchor$1(url)];// If we've gone over 26 pages, something has
// likely gone wrong.
case 2:if(!(next_page_url&&pages<26)){_context.next=15;break;}pages+=1;_context.next=6;return Resource.create(next_page_url);case 6:$=_context.sent;html=$.html();extractorOpts={url:next_page_url,html:html,$:$,metaCache:metaCache,contentOnly:true,extractedTitle:title,previousUrls:previousUrls,cheerio:cheerio$$1};nextPageResult=RootExtractor.extract(Extractor,extractorOpts);previousUrls.push(next_page_url);result=_extends$1({},result,{content:'\n '+result.content+'\n <hr>\n <h4>Page '+pages+'</h4>\n '+nextPageResult.content+'\n '});next_page_url=nextPageResult.next_page_url;_context.next=2;break;case 15:word_count=GenericExtractor.word_count({content:'<div>'+result.content+'</div>'});return _context.abrupt('return',_extends$1({},result,{total_pages:pages,pages_rendered:pages,word_count:word_count}));case 17:case'end':return _context.stop();}}},_callee,this);}));function collectAllPages(_x){return _ref.apply(this,arguments);}return collectAllPages;}();var Mercury={parse:function parse(url,html){var _this=this;var opts=arguments.length>2&&arguments[2]!==undefined?arguments[2]:{};return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var _opts$fetchAllPages,fetchAllPages,_opts$fallback,fallback,parsedUrl,Extractor,$,metaCache,result,_result,title,next_page_url;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:_opts$fetchAllPages=opts.fetchAllPages,fetchAllPages=_opts$fetchAllPages===undefined?true:_opts$fetchAllPages,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;parsedUrl=URL$1.parse(url);if(validateUrl(parsedUrl)){_context.next=4;break;}return _context.abrupt('return',Errors.badUrl);case 4:Extractor=getExtractor(url,parsedUrl);// console.log(`Using extractor for ${Extractor.domain}`);
_context.next=7;return Resource.create(url,html,parsedUrl);case 7:$=_context.sent;if(!$.error){_context.next=10;break;}return _context.abrupt('return',$);case 10:html=$.html();// Cached value of every meta name in our document.
case 2:if(!(next_page_url&&pages<26)){_context.next=15;break;}pages+=1;_context.next=6;return Resource.create(next_page_url);case 6:$=_context.sent;html=$.html();extractorOpts={url:next_page_url,html:html,$:$,metaCache:metaCache,contentOnly:true,extractedTitle:title,previousUrls:previousUrls};nextPageResult=RootExtractor.extract(Extractor,extractorOpts);previousUrls.push(next_page_url);result=_extends$1({},result,{content:result.content+'<hr><h4>Page '+pages+'</h4>'+nextPageResult.content});next_page_url=nextPageResult.next_page_url;_context.next=2;break;case 15:word_count=GenericExtractor.word_count({content:'<div>'+result.content+'</div>'});return _context.abrupt('return',_extends$1({},result,{total_pages:pages,pages_rendered:pages,word_count:word_count}));case 17:case'end':return _context.stop();}}},_callee,this);}));function collectAllPages(_x){return _ref.apply(this,arguments);}return collectAllPages;}();var Mercury={parse:function parse(url,html){var _this=this;var opts=arguments.length>2&&arguments[2]!==undefined?arguments[2]:{};return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var _opts$fetchAllPages,fetchAllPages,_opts$fallback,fallback,parsedUrl,Extractor,$,metaCache,result,_result,title,next_page_url;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:_opts$fetchAllPages=opts.fetchAllPages,fetchAllPages=_opts$fetchAllPages===undefined?true:_opts$fetchAllPages,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;// if no url was passed and this is the browser version,
// set url to window.location.href and load the html
// from the current page
if(!url&&cheerio$1.browser){url=window.location.href;// eslint-disable-line no-undef
html=html||cheerio$1.html();}parsedUrl=URL$1.parse(url);if(validateUrl(parsedUrl)){_context.next=5;break;}return _context.abrupt('return',Errors.badUrl);case 5:Extractor=getExtractor(url,parsedUrl);// console.log(`Using extractor for ${Extractor.domain}`);
_context.next=8;return Resource.create(url,html,parsedUrl);case 8:$=_context.sent;if(!$.failed){_context.next=11;break;}return _context.abrupt('return',$);case 11:// if html still has not been set (i.e., url passed to Mercury.parse),
// set html from the response of Resource.create
if(!html){html=$.html();}// Cached value of every meta name in our document.
// Used when extracting title/author/date_published/dek
metaCache=$('meta').map(function(_,node){return $(node).attr('name');}).toArray();result=RootExtractor.extract(Extractor,{url:url,html:html,$:$,metaCache:metaCache,parsedUrl:parsedUrl,fallback:fallback,cheerio:cheerio$1});_result=result,title=_result.title,next_page_url=_result.next_page_url;// Fetch more pages if next_page_url found
if(!(fetchAllPages&&next_page_url)){_context.next=20;break;}_context.next=17;return collectAllPages({Extractor:Extractor,next_page_url:next_page_url,html:html,$:$,metaCache:metaCache,result:result,title:title,url:url,cheerio:cheerio$1});case 17:result=_context.sent;_context.next=21;break;case 20:result=_extends$1({},result,{total_pages:1,rendered_pages:1});case 21:return _context.abrupt('return',result);case 22:case'end':return _context.stop();}}},_callee,_this);}))();},// A convenience method for getting a resource
metaCache=$('meta').map(function(_,node){return $(node).attr('name');}).toArray();result=RootExtractor.extract(Extractor,{url:url,html:html,$:$,metaCache:metaCache,parsedUrl:parsedUrl,fallback:fallback});_result=result,title=_result.title,next_page_url=_result.next_page_url;// Fetch more pages if next_page_url found
if(!(fetchAllPages&&next_page_url)){_context.next=21;break;}_context.next=18;return collectAllPages({Extractor:Extractor,next_page_url:next_page_url,html:html,$:$,metaCache:metaCache,result:result,title:title,url:url});case 18:result=_context.sent;_context.next=22;break;case 21:result=_extends$1({},result,{total_pages:1,rendered_pages:1});case 22:// if this parse is happening in the browser,
// clean up any trace from the page.
if(cheerio$1.browser){cheerio$1.cleanup();}return _context.abrupt('return',result);case 24:case'end':return _context.stop();}}},_callee,_this);}))();},browser:!!cheerio$1.browser,// A convenience method for getting a resource
// to work with, e.g., for custom extractor generator
fetchResource:function fetchResource(url){var _this2=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee2(){return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1){switch(_context2.prev=_context2.next){case 0:_context2.next=2;return Resource.create(url);case 2:return _context2.abrupt('return',_context2.sent);case 3:case'end':return _context2.stop();}}},_callee2,_this2);}))();}};var mercury=Mercury;
@ -2206,23 +2279,23 @@ var extractorTemplate = function (hostname, name) {
return template(_templateObject, name, hostname);
};
var _templateObject$1 = _taggedTemplateLiteral(['\n it(\'returns the ', '\', async () => {\n // To pass this test, fill out the ', ' selector\n // in ', '/index.js.\n const html =\n fs.readFileSync(\'', '\');\n const articleUrl =\n \'', '\';\n\n const { ', ' } =\n await Mercury.parse(articleUrl, html, { fallback: false });\n\n // Update these values with the expected values from\n // the article.\n assert.equal(', ', ', ')\n });\n '], ['\n it(\'returns the ', '\', async () => {\n // To pass this test, fill out the ', ' selector\n // in ', '/index.js.\n const html =\n fs.readFileSync(\'', '\');\n const articleUrl =\n \'', '\';\n\n const { ', ' } =\n await Mercury.parse(articleUrl, html, { fallback: false });\n\n // Update these values with the expected values from\n // the article.\n assert.equal(', ', ', ')\n });\n ']);
var _templateObject2 = _taggedTemplateLiteral(['\n import assert from \'assert\';\n import fs from \'fs\';\n import URL from \'url\';\n import cheerio from \'cheerio\';\n\n import Mercury from \'mercury\';\n import getExtractor from \'extractors/get-extractor\';\n import { excerptContent } from \'utils/text\';\n\n describe(\'', '\', () => {\n it(\'is selected properly\', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const url =\n \'', '\';\n const extractor = getExtractor(url);\n assert.equal(extractor.domain, URL.parse(url).hostname)\n })\n\n ', '\n\n it(\'returns the content\', async () => {\n // To pass this test, fill out the content selector\n // in ', '/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const html =\n fs.readFileSync(\'', '\');\n const url =\n \'', '\';\n\n const { content } =\n await Mercury.parse(url, html, { fallback: false });\n\n const $ = cheerio.load(content || \'\');\n\n const first13 = excerptContent($(\'*\').first().text(), 13)\n\n // Update these values with the expected values from\n // the article.\n assert.equal(first13, \'Add the first 13 words of the article here\');\n });\n });\n '], ['\n import assert from \'assert\';\n import fs from \'fs\';\n import URL from \'url\';\n import cheerio from \'cheerio\';\n\n import Mercury from \'mercury\';\n import getExtractor from \'extractors/get-extractor\';\n import { excerptContent } from \'utils/text\';\n\n describe(\'', '\', () => {\n it(\'is selected properly\', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const url =\n \'', '\';\n const extractor = getExtractor(url);\n assert.equal(extractor.domain, URL.parse(url).hostname)\n })\n\n ', '\n\n it(\'returns the content\', async () => {\n // To pass this test, fill out the content selector\n // in ', '/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const html =\n fs.readFileSync(\'', '\');\n const url =\n \'', '\';\n\n const { content } =\n await Mercury.parse(url, html, { fallback: false });\n\n const $ = cheerio.load(content || \'\');\n\n const first13 = excerptContent($(\'*\').first().text(), 13)\n\n // Update these values with the expected values from\n // the article.\n assert.equal(first13, \'Add the first 13 words of the article here\');\n });\n });\n ']);
var _templateObject$1 = _taggedTemplateLiteral(['\n it(\'returns the ', '\', async () => {\n // To pass this test, fill out the ', ' selector\n // in ', '/index.js.\n const { ', ' } = await result\n\n // Update these values with the expected values from\n // the article.\n assert.equal(', ', ', ')\n });\n '], ['\n it(\'returns the ', '\', async () => {\n // To pass this test, fill out the ', ' selector\n // in ', '/index.js.\n const { ', ' } = await result\n\n // Update these values with the expected values from\n // the article.\n assert.equal(', ', ', ')\n });\n ']);
var _templateObject2 = _taggedTemplateLiteral(['\n import assert from \'assert\';\n import fs from \'fs\';\n import URL from \'url\';\n import cheerio from \'cheerio\';\n\n import Mercury from \'mercury\';\n import getExtractor from \'extractors/get-extractor\';\n import { excerptContent } from \'utils/text\';\n\n describe(\'', '\', () => {\n describe(\'initial test case\', () => {\n let result;\n let url;\n beforeAll(() => {\n url =\n \'', '\';\n const html =\n fs.readFileSync(\'', '\');\n result =\n Mercury.parse(url, html, { fallback: false });\n });\n\n it(\'is selected properly\', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const extractor = getExtractor(url);\n assert.equal(extractor.domain, URL.parse(url).hostname)\n })\n\n ', '\n\n it(\'returns the content\', async () => {\n // To pass this test, fill out the content selector\n // in ', '/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const { content } = await result;\n\n const $ = cheerio.load(content || \'\');\n\n const first13 = excerptContent($(\'*\').first().text(), 13)\n\n // Update these values with the expected values from\n // the article.\n assert.equal(first13, \'Add the first 13 words of the article here\');\n });\n });\n });\n '], ['\n import assert from \'assert\';\n import fs from \'fs\';\n import URL from \'url\';\n import cheerio from \'cheerio\';\n\n import Mercury from \'mercury\';\n import getExtractor from \'extractors/get-extractor\';\n import { excerptContent } from \'utils/text\';\n\n describe(\'', '\', () => {\n describe(\'initial test case\', () => {\n let result;\n let url;\n beforeAll(() => {\n url =\n \'', '\';\n const html =\n fs.readFileSync(\'', '\');\n result =\n Mercury.parse(url, html, { fallback: false });\n });\n\n it(\'is selected properly\', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const extractor = getExtractor(url);\n assert.equal(extractor.domain, URL.parse(url).hostname)\n })\n\n ', '\n\n it(\'returns the content\', async () => {\n // To pass this test, fill out the content selector\n // in ', '/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const { content } = await result;\n\n const $ = cheerio.load(content || \'\');\n\n const first13 = excerptContent($(\'*\').first().text(), 13)\n\n // Update these values with the expected values from\n // the article.\n assert.equal(first13, \'Add the first 13 words of the article here\');\n });\n });\n });\n ']);
var IGNORE = ['url', 'domain', 'content', 'word_count', 'next_page_url', 'excerpt', 'direction', 'total_pages', 'rendered_pages'];
function testFor(key, value, dir, file, url) {
function testFor(key, value, dir, file) {
if (IGNORE.find(function (k) {
return k === key;
})) return '';
return template(_templateObject$1, key, key, dir, file, url, key, key, value ? '`' + value + '`' : "''");
return template(_templateObject$1, key, key, dir, key, key, value ? '`' + value + '`' : "''");
}
var extractorTestTemplate = function (file, url, dir, result, name) {
return template(_templateObject2, name, url, _Reflect$ownKeys(result).map(function (k) {
return testFor(k, result[k], dir, file, url);
}).join('\n\n'), dir, file, url);
return template(_templateObject2, name, url, file, _Reflect$ownKeys(result).map(function (k) {
return testFor(k, result[k], dir, file);
}).join('\n\n'), dir);
};
/* eslint-disable import/no-extraneous-dependencies */

File diff suppressed because one or more lines are too long

@ -15,7 +15,7 @@
"test_build": "rollup -c",
"test": "yarn test:node && yarn test:web",
"test:node": "jest ./src",
"test:web": "./node_modules/karma/bin/karma start karma.conf.js",
"test:web": "./node_modules/karma/bin/karma start karma.conf.js --auto-watch",
"test:build": "cd ./scripts && jest check-build.test.js",
"test:build:web": "node ./scripts/proxy-browser-test.js",
"watch:test": "jest --watch",
@ -68,7 +68,7 @@
"ora": "^0.3.0",
"phantomjs-polyfill-find": "ptim/phantomjs-polyfill-find",
"phantomjs-polyfill-string-includes": "^1.0.0",
"phantomjs-prebuilt": "^2.1.7",
"phantomjs-prebuilt": "^2.1.13",
"requirejs": "^2.3.2",
"rollup": "^0.36.3",
"rollup-plugin-babel": "^2.6.1",

@ -12,20 +12,14 @@ const IGNORE = [
'rendered_pages',
];
function testFor(key, value, dir, file, url) {
function testFor(key, value, dir) {
if (IGNORE.find(k => k === key)) return '';
return template`
it('returns the ${key}', async () => {
// To pass this test, fill out the ${key} selector
// in ${dir}/index.js.
const html =
fs.readFileSync('${file}');
const articleUrl =
'${url}';
const { ${key} } =
await Mercury.parse(articleUrl, html, { fallback: false });
const { ${key} } = await result
// Update these values with the expected values from
// the article.
@ -46,38 +40,43 @@ export default function (file, url, dir, result, name) {
import { excerptContent } from 'utils/text';
describe('${name}', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'${url}';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname)
})
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'${url}';
const html =
fs.readFileSync('${file}');
result =
Mercury.parse(url, html, { fallback: false });
});
${Reflect.ownKeys(result).map(k => testFor(k, result[k], dir, file, url)).join('\n\n')}
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname)
})
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ${dir}/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('${file}');
const url =
'${url}';
${Reflect.ownKeys(result).map(k => testFor(k, result[k], dir)).join('\n\n')}
const { content } =
await Mercury.parse(url, html, { fallback: false });
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ${dir}/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13)
const first13 = excerptContent($('*').first().text(), 13)
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Add the first 13 words of the article here');
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Add the first 13 words of the article here');
});
});
});
`;

@ -8,102 +8,77 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('DeadspinExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/deadspin.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/deadspin.com/index.js.
const html =
fs.readFileSync('./fixtures/deadspin.com/1476389931786.html');
const articleUrl =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'The Nationals Are Stuck With Danny Espinosa Tonight, Unless They Opt For The Only Thing Worse');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/deadspin.com/index.js.
const html =
fs.readFileSync('./fixtures/deadspin.com/1476389931786.html');
const articleUrl =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Chris Thompson');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/deadspin.com/index.js.
const html =
fs.readFileSync('./fixtures/deadspin.com/1476389931786.html');
const articleUrl =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-13T16:34:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/deadspin.com/index.js.
const html =
fs.readFileSync('./fixtures/deadspin.com/1476389931786.html');
const articleUrl =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://i.kinja-img.com/gawker-media/image/upload/s--SUEXWZgf--/c_fill,fl_progressive,g_center,h_450,q_80,w_800/vmeayd7lteyycwzcdlju.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/deadspin.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/deadspin.com/1476389931786.html');
const url =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Photo credit: Rob Carr/Getty Washingtons Danny Espinosa problem is inextricably linked to its');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://deadspin.com/the-nationals-are-stuck-with-danny-espinosa-tonight-un-1787706769';
const html =
fs.readFileSync('./fixtures/deadspin.com/1476389931786.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/deadspin.com/index.js.
// Update these values with the expected values from
// the article.
const { title } = await result;
assert.equal(title, 'The Nationals Are Stuck With Danny Espinosa Tonight, Unless They Opt For The Only Thing Worse');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/deadspin.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Chris Thompson');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/deadspin.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-13T16:34:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/deadspin.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://i.kinja-img.com/gawker-media/image/upload/s--SUEXWZgf--/c_fill,fl_progressive,g_center,h_450,q_80,w_800/vmeayd7lteyycwzcdlju.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/deadspin.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Photo credit: Rob Carr/Getty Washingtons Danny Espinosa problem is inextricably linked to its');
});
});
it('handles lazy-loaded video', async () => {

@ -9,101 +9,82 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('WikiaExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/fandom.wikia.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const html =
fs.readFileSync('./fixtures/fandom.wikia.com/1475595373938.html');
const articleUrl =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Box Office: Its Good to Be Peculiar');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const html =
fs.readFileSync('./fixtures/fandom.wikia.com/1475595373938.html');
const articleUrl =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Drew Dietsch');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const html =
fs.readFileSync('./fixtures/fandom.wikia.com/1475595373938.html');
const articleUrl =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-03T02:30:57.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const html =
fs.readFileSync('./fixtures/fandom.wikia.com/1475595373938.html');
const articleUrl =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://fandom.wikia.com/wp-content/uploads/2016/10/box-office-peculiar-feature-hero.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/fandom.wikia.com/1475595373938.html');
const url =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Tim Burton once again claimed the top spot at the box office. Miss');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://fandom.wikia.com/articles/box-office-good-peculiar';
const html =
fs.readFileSync('./fixtures/fandom.wikia.com/1475595373938.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/fandom.wikia.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Box Office: Its Good to Be Peculiar');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Drew Dietsch');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-03T02:30:57.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://fandom.wikia.com/wp-content/uploads/2016/10/box-office-peculiar-feature-hero.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/fandom.wikia.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Tim Burton once again claimed the top spot at the box office. Miss');
});
});
});

@ -8,98 +8,73 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('MediumExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/medium.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
const articleUrl =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
assert.equal(title, 'WTF? Whats The Future?');
});
it('returns the author', async () => {
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
const articleUrl =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
assert.equal(author, 'Tim O\'Reilly');
});
it('returns the date_published', async () => {
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
const articleUrl =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
assert.equal(date_published, '2016-10-19T14:24:20.323Z');
});
it('returns the dek', async () => {
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
const articleUrl =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
assert.equal(dek, null);
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/medium.com/index.js.
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
const articleUrl =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://cdn-images-1.medium.com/max/1200/1*3Gzaug9mRc8vvx1cuQWkog.png');
});
it('returns the content', async () => {
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
const url =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// testing that youtube video transform is working
assert.equal(/IAoy3ia2ivI/.test(content), true);
assert.equal(first13, 'Video of WTF? My talk at the White House Frontiers ConferenceLast Thursday, I');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const html =
fs.readFileSync('./fixtures/medium.com/1477523363921.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/medium.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
const { title } = await result;
assert.equal(title, 'WTF? Whats The Future?');
});
it('returns the author', async () => {
const { author } = await result;
assert.equal(author, 'Tim O\'Reilly');
});
it('returns the date_published', async () => {
const { date_published } = await result;
assert.equal(date_published, '2016-10-19T14:24:20.323Z');
});
it('returns the dek', async () => {
const { dek } = await result;
assert.equal(dek, null);
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/medium.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://cdn-images-1.medium.com/max/1200/1*3Gzaug9mRc8vvx1cuQWkog.png');
});
it('returns the content', async () => {
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// testing that youtube video transform is working
assert.equal(/IAoy3ia2ivI/.test(content), true);
assert.equal(first13, 'Video of WTF? My talk at the White House Frontiers ConferenceLast Thursday, I');
});
});
});

@ -8,115 +8,89 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('MoneyCnnComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
const articleUrl =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Hundreds of Chicago O\'Hare airport workers go on strike');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
const articleUrl =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Julia Horowitz');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
const articleUrl =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-29T03:33:08.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
const articleUrl =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'Heads up, travelers: Hundreds of workers are striking at Chicago O\'Hare International Airport on Tuesday.');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
const articleUrl =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://i2.cdn.turner.com/money/dam/assets/161118102423-ohare-airport-strike-780x439.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/money.cnn.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
const url =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Janitors, baggage handlers, cabin cleaners and wheelchair attendants are asking for a $15');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://money.cnn.com/2016/11/29/news/ohare-workers-strike/index.html';
const html =
fs.readFileSync('./fixtures/money.cnn.com/1480437611330.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Hundreds of Chicago O\'Hare airport workers go on strike');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Julia Horowitz');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-29T03:33:08.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const { dek } = await result;
// Update these values with the expected values from
// the article.
assert.equal(dek, 'Heads up, travelers: Hundreds of workers are striking at Chicago O\'Hare International Airport on Tuesday.');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/money.cnn.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://i2.cdn.turner.com/money/dam/assets/161118102423-ohare-airport-strike-780x439.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/money.cnn.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Janitors, baggage handlers, cabin cleaners and wheelchair attendants are asking for a $15');
});
});
});

@ -8,169 +8,137 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('NewrepublicComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('article returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Fantastic Beasts: A Nice Place to Visit');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Will Leitch');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-18T05:00:00.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { dek } = await result;
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The glorious world-building in the first Harry Potter spin-off isn\'t enough to keep viewers coming back.');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://images.newrepublic.com/29020c1e6b108813cf65b54487ad2b5a65aa6079.jpeg?w=1109&h=577&crop=faces&fit=crop&fm=jpg');
});
it('article returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/newrepublic.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The eight Harry Potter films, which stretched out over nearly a decade, had');
});
});
it('article returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
const articleUrl =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Fantastic Beasts: A Nice Place to Visit');
});
it('minute returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480446502259.html');
const articleUrl =
'https://newrepublic.com/minutes/139022/maybe-donald-trumps-twitter-account-just-smoke-screen';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Maybe Donald Trumps Twitter account is more than just a smoke screen.');
});
it('article returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480446502259.html');
const articleUrl =
'https://newrepublic.com/minutes/139022/maybe-donald-trumps-twitter-account-just-smoke-screen';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Alex Shephard');
});
it('minute returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
const articleUrl =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Will Leitch');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
const articleUrl =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-18T05:00:00.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
const articleUrl =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The glorious world-building in the first Harry Potter spin-off isn\'t enough to keep viewers coming back.');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
const articleUrl =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://images.newrepublic.com/29020c1e6b108813cf65b54487ad2b5a65aa6079.jpeg?w=1109&h=577&crop=faces&fit=crop&fm=jpg');
});
it('article returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/newrepublic.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480434805231.html');
const url =
'https://newrepublic.com/article/138859/fantastic-beasts-nice-place-visit';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The eight Harry Potter films, which stretched out over nearly a decade, had');
});
it('minute returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/newrepublic.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480446502259.html');
const url =
'https://newrepublic.com/minutes/139022/maybe-donald-trumps-twitter-account-just-smoke-screen';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Its been one of the most persistent narratives of the last year: Whenever');
describe('minutes', async () => {
let result;
let url;
beforeAll(async () => {
url =
'https://newrepublic.com/minutes/139022/maybe-donald-trumps-twitter-account-just-smoke-screen';
const html =
fs.readFileSync('./fixtures/newrepublic.com/1480446502259.html');
result = await Mercury.parse(url, html, { fallback: false });
});
it('minute returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Maybe Donald Trumps Twitter account is more than just a smoke screen.');
});
it('article returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/newrepublic.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Alex Shephard');
});
it('minute returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/newrepublic.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Its been one of the most persistent narratives of the last year: Whenever');
});
});
});

@ -8,99 +8,79 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('WwwAolComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.aol.com/index.js.
const html =
fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
const articleUrl =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Son of slain police officer given teddy bears made from dad\'s uniform');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.aol.com/index.js.
const html =
fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
const articleUrl =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'AOL Staff');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.aol.com/index.js.
const html =
fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
const articleUrl =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-12-01T18:01:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.aol.com/index.js.
const html =
fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
const articleUrl =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://o.aolcdn.com/dims-shared/dims3/GLOB/crop/475x312+0+0/resize/1028x675!/format/jpg/quality/85/http%3A%2F%2Fo.aolcdn.com%2Fhss%2Fstorage%2Fmidas%2Fc8242ab14e089c284b031379d025d64%2F204656928%2FScreen%2BShot%2B2016-12-01%2Bat%2B1.15.51%2BPM.png');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.aol.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
const url =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'ST. LOUIS, MO (KTVI) Amid unimaginable grief, the widow of slain Saint');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const html =
fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.aol.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Son of slain police officer given teddy bears made from dad\'s uniform');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.aol.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'AOL Staff');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.aol.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-12-01T18:01:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.aol.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://o.aolcdn.com/dims-shared/dims3/GLOB/crop/475x312+0+0/resize/1028x675!/format/jpg/quality/85/http%3A%2F%2Fo.aolcdn.com%2Fhss%2Fstorage%2Fmidas%2Fc8242ab14e089c284b031379d025d64%2F204656928%2FScreen%2BShot%2B2016-12-01%2Bat%2B1.15.51%2BPM.png');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.aol.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'ST. LOUIS, MO (KTVI) Amid unimaginable grief, the widow of slain Saint');
});
});
});

@ -8,102 +8,82 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('CustomExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.apartmenttherapy.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
const articleUrl =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'A Light Filled LA Loft');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
const articleUrl =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Apartment Therapy Submissions');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
const articleUrl =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-13T21:00:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
const articleUrl =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://atmedia.imgix.net/9332fdca908b1fcc5c9a6891b458820718239950?w=1500&fit=max');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
const url =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Name: Ashley Location: Downtown — Los Angeles, California Welcome to our sunny and');
describe('ApartmentTherapyExtractor', () => {
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.apartmenttherapy.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'A Light Filled LA Loft');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Apartment Therapy Submissions');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-13T21:00:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://atmedia.imgix.net/9332fdca908b1fcc5c9a6891b458820718239950?w=1500&fit=max');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Name: Ashley Location: Downtown — Los Angeles, California Welcome to our sunny and');
});
});
});

@ -9,101 +9,81 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('CustomExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.broadwayworld.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
const articleUrl =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'American Theatre Wing Launches Andrew Lloyd Webber Training Scholarships');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
const articleUrl =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'BWW News Desk');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
const articleUrl =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-13T19:35:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
const articleUrl =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://images.bwwstatic.com/columnpic7/7B5FD766-A644-E386-19DE07017A3AD79C.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
const url =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The American Theatre Wing announced today that their Andrew Lloyd Webber Initiative has');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.broadwayworld.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'American Theatre Wing Launches Andrew Lloyd Webber Training Scholarships');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'BWW News Desk');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-13T19:35:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://images.bwwstatic.com/columnpic7/7B5FD766-A644-E386-19DE07017A3AD79C.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The American Theatre Wing announced today that their Andrew Lloyd Webber Initiative has');
});
});
});

@ -56,7 +56,6 @@ export const BuzzfeedExtractor = {
date_published: {
selectors: [
'.buzz-datetime',
// enter author selectors
],
},

@ -9,147 +9,109 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('BuzzfeedExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.buzzfeed.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.buzzfeed.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'People Are Calling Out This Edited Picture Of Demi Lovato For Body-Shaming Her');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Ikran Dahir');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://img.buzzfeed.com/buzzfeed-static/static/2016-10/3/12/social_promotion/buzzfeed-prod-fastlane01/facebook-social-promotion-17757-1475512210-1.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'A few months ago, Vladimir Serbanescu, a 17-year-old artist from Romania, drew this');
});
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
const articleUrl =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
describe('splash image', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.buzzfeed.com/katiejmbaker/college-trump-supporters-the-new-counterculture?utm_term=.ckb72b58Y#.oxY8ZOWY3';
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1480717502688.html');
result =
Mercury.parse(url, html, { fallback: false });
});
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
it('returns big header images in the content', async () => {
const { content } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'People Are Calling Out This Edited Picture Of Demi Lovato For Body-Shaming Her');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
const articleUrl =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Ikran Dahir');
});
// it('returns the date_published', async () => {
// // To pass this test, fill out the date_published selector
// // in ./src/extractors/custom/www.buzzfeed.com/index.js.
// const html =
// fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
// const articleUrl =
// 'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
//
// const { date_published } =
// await Mercury.parse(articleUrl, html, { fallback: false });
//
// // Update these values with the expected values from
// // the article.
// // assert.equal(date_published, 'hi');
// });
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
const articleUrl =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://img.buzzfeed.com/buzzfeed-static/static/2016-10/3/12/social_promotion/buzzfeed-prod-fastlane01/facebook-social-promotion-17757-1475512210-1.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
const url =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'A few months ago, Vladimir Serbanescu, a 17-year-old artist from Romania, drew this');
});
it('returns big header images in the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1480717502688.html');
const url =
'https://www.buzzfeed.com/katiejmbaker/college-trump-supporters-the-new-counterculture?utm_term=.ckb72b58Y#.oxY8ZOWY3';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const imgSrc = $('img').first().attr('src');
// Update these values with the expected values from
// the article.
assert.equal(imgSrc, 'https://img.buzzfeed.com/buzzfeed-static/static/2016-11/21/10/enhanced/buzzfeed-prod-fastlane03/longform-original-25748-1479741827-5.jpg');
});
const imgSrc = $('img').first().attr('src');
it('transforms the splash image to a figure and caption', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1480717502688.html');
const url =
'https://www.buzzfeed.com/katiejmbaker/college-trump-supporters-the-new-counterculture?utm_term=.ckb72b58Y#.oxY8ZOWY3';
assert.equal(imgSrc, 'https://img.buzzfeed.com/buzzfeed-static/static/2016-11/21/10/enhanced/buzzfeed-prod-fastlane03/longform-original-25748-1479741827-5.jpg');
});
const { content } =
await Mercury.parse(url, html, { fallback: false });
it('transforms the splash image to a figure and caption', async () => {
const { content } = await result;
const $ = cheerio.load(content || '');
const $ = cheerio.load(content || '');
const imgSrc = $('figure img').first().attr('src');
const figcaption = $('figure figcaption').first().text();
const imgSrc = $('figure img').first().attr('src');
const figcaption = $('figure figcaption').first().text();
// Update these values with the expected values from
// the article.
assert.equal(imgSrc, 'https://img.buzzfeed.com/buzzfeed-static/static/2016-11/21/10/enhanced/buzzfeed-prod-fastlane03/longform-original-25748-1479741827-5.jpg');
assert.equal(figcaption, 'Adam Maida for BuzzFeed News');
// Update these values with the expected values from
// the article.
assert.equal(imgSrc, 'https://img.buzzfeed.com/buzzfeed-static/static/2016-11/21/10/enhanced/buzzfeed-prod-fastlane03/longform-original-25748-1479741827-5.jpg');
assert.equal(figcaption, 'Adam Maida for BuzzFeed News');
});
});
});

@ -8,100 +8,81 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('WwwCnnComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
const articleUrl =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Why Donald Trump won\'t change');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
const articleUrl =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Stephen Collinson, CNN');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
const articleUrl =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-29T10:39:35.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
const articleUrl =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://i2.cdn.cnn.com/cnnnext/dam/assets/161128072443-01-trump-1128-super-tease.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.cnn.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
const url =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, '(CNN)Winning the presidency didn\'t change Donald Trump -- and it\'s increasingly clear that');
assert.equal($('.media__video--thumbnail').length, 1);
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Why Donald Trump won\'t change');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Stephen Collinson, CNN');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-29T10:39:35.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://i2.cdn.cnn.com/cnnnext/dam/assets/161128072443-01-trump-1128-super-tease.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.cnn.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, '(CNN)Winning the presidency didn\'t change Donald Trump -- and it\'s increasingly clear that');
assert.equal($('.media__video--thumbnail').length, 1);
});
});
});

@ -8,115 +8,90 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('WwwHuffingtonpostComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
const articleUrl =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Trump Has Shown Receptiveness To Obama\'s Agenda. Does He Actually Mean It?');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
const articleUrl =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Sam Stein');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
const articleUrl =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-28T21:23:00.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
const articleUrl =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The $1 million question: Can you change the president-elect\'s worldview or is this all for show?');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
const articleUrl =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://img.huffingtonpost.com/asset/2000_1000/583c90681a00002500cca17a.jpeg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
const url =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'ASSOCIATED PRESS Donald Trump has had several conversations with President Obama. How much');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.huffingtonpost.com/entry/donald-trump-obama_us_583c8f01e4b06539a789ddd4';
const html =
fs.readFileSync('./fixtures/www.huffingtonpost.com/1480454076105.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Trump Has Shown Receptiveness To Obama\'s Agenda. Does He Actually Mean It?');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Sam Stein');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-28T21:23:00.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const { dek } = await result;
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The $1 million question: Can you change the president-elect\'s worldview or is this all for show?');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://img.huffingtonpost.com/asset/2000_1000/583c90681a00002500cca17a.jpeg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.huffingtonpost.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'ASSOCIATED PRESS Donald Trump has had several conversations with President Obama. How much');
});
});
});

@ -9,85 +9,72 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('LittleThingsExtractor', () => {
it('is selected properly', () => {
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.littlethings.com/diy-pineapple-lamp/';
const html =
fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.littlethings.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.littlethings.com/diy-pineapple-lamp/';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.littlethings.com/index.js.
const html =
fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
const articleUrl =
'http://www.littlethings.com/diy-pineapple-lamp/';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Snip The Stems Off Plastic Spoons To Make A Quirky Pineapple Lamp');
});
assert.equal(title, 'Snip The Stems Off Plastic Spoons To Make A Quirky Pineapple Lamp');
});
it('returns the author', async () => {
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.littlethings.com/index.js.
const html =
fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
const articleUrl =
'http://www.littlethings.com/diy-pineapple-lamp/';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Laura Caseley');
});
assert.equal(author, 'Laura Caseley');
});
it('returns the lead_image_url', async () => {
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.littlethings.com/index.js.
const html =
fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
const articleUrl =
'http://www.littlethings.com/diy-pineapple-lamp/';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://cdn1.littlethings.com/app/uploads/2016/09/pineapple-b-thumb-1.jpg');
});
assert.equal(lead_image_url, 'http://cdn1.littlethings.com/app/uploads/2016/09/pineapple-b-thumb-1.jpg');
});
it('returns the content', async () => {
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.littlethings.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
const url =
'http://www.littlethings.com/diy-pineapple-lamp/';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const { content } = await result;
const $ = cheerio.load(content || '');
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Every room needs light, and so lamps are pretty much a necessity for');
assert.equal(first13, 'Every room needs light, and so lamps are pretty much a necessity for');
});
});
});

@ -9,101 +9,82 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('MSNExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.msn.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.msn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
const articleUrl =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'This Is Your Brain On Sad Movies; Plus 5 Films To Cry To');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.msn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
const articleUrl =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Lizette Borreli');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.msn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
const articleUrl =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published.split('T')[0], '2016-09-21');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.msn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
const articleUrl =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, null);
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.msn.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
const url =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The psychological reason why we love to watch sad movies is linked to');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.msn.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.msn.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'This Is Your Brain On Sad Movies; Plus 5 Films To Cry To');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.msn.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Lizette Borreli');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.msn.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published.split('T')[0], '2016-09-21');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.msn.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, null);
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.msn.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The psychological reason why we love to watch sad movies is linked to');
});
});
});

@ -8,130 +8,110 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('NewYorkerExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.newyorker.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.newyorker.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Hacking, Cryptography, and the Countdown to Quantum Computing');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Alex Hutchinson');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-09-26T18:04:22.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://www.newyorker.com/wp-content/uploads/2016/09/Hutchinson-Quantum-Computing-1200x630-1474903563.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'In a laboratory in Shanghai, researchers work on developing a quantum computer—a new');
});
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
const articleUrl =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Hacking, Cryptography, and the Countdown to Quantum Computing');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
const articleUrl =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Alex Hutchinson');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
const articleUrl =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-09-26T18:04:22.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
const articleUrl =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://www.newyorker.com/wp-content/uploads/2016/09/Hutchinson-Quantum-Computing-1200x630-1474903563.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
const url =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'In a laboratory in Shanghai, researchers work on developing a quantum computer—a new');
});
it('returns the dek when present', async () => {
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1480713300334.html');
const url =
'http://www.newyorker.com/magazine/2016/12/05/lessons-from-my-mother';
const { dek } =
await Mercury.parse(url, html, { fallback: false });
assert.equal(
dek,
'I had a sense that she was a good teacher, but I had no idea that she was such an influential one, and in the very area I had chosen.'
);
});
it('returns the date for magazine content', async () => {
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1480713300334.html');
const url =
'http://www.newyorker.com/magazine/2016/12/05/lessons-from-my-mother';
const { date_published } =
await Mercury.parse(url, html, { fallback: false });
assert.equal(date_published, '2016-11-28T05:00:00.000Z');
describe('magazine content', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.newyorker.com/magazine/2016/12/05/lessons-from-my-mother';
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1480713300334.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('returns the dek when present', async () => {
const { dek } = await result;
assert.equal(
dek,
'I had a sense that she was a good teacher, but I had no idea that she was such an influential one, and in the very area I had chosen.'
);
});
it('returns the date for magazine content', async () => {
const { date_published } = await result;
assert.equal(date_published, '2016-11-28T05:00:00.000Z');
});
});
});

@ -8,102 +8,83 @@ import { excerptContent } from 'utils/text';
import Mercury from 'mercury';
describe('NYTimesExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.nytimes.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.nytimes.com/interactive/2016/09/15/arts/design/national-museum-of-african-american-history-and-culture.html';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const html =
fs.readFileSync('./fixtures/www.nytimes.com/1474318141888.html');
const articleUrl =
'http://www.nytimes.com/2016/09/20/nyregion/nyc-nj-explosions-ahmad-khan-rahami.html';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Ahmad Khan Rahami Is Arrested in Manhattan and New Jersey Bombings');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const html =
fs.readFileSync('./fixtures/www.nytimes.com/1474318141888.html');
const articleUrl =
'http://www.nytimes.com/2016/09/20/nyregion/nyc-nj-explosions-ahmad-khan-rahami.html';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Marc Santora, William K. Rashbaum, Al Baker and Adam Goldman');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const html =
fs.readFileSync('./fixtures/www.nytimes.com/1474318141888.html');
const articleUrl =
'http://www.nytimes.com/2016/09/20/nyregion/nyc-nj-explosions-ahmad-khan-rahami.html';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-09-19T11:46:01.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const html =
fs.readFileSync('./fixtures/www.nytimes.com/1474318141888.html');
const articleUrl =
'http://www.nytimes.com/2016/09/20/nyregion/nyc-nj-explosions-ahmad-khan-rahami.html';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://static01.nyt.com/images/2016/09/20/nyregion/20MANHUNT1/20MANHUNT1-facebookJumbo.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.nytimes.com/1474318141888.html');
const url =
'http://www.nytimes.com/2016/09/20/nyregion/nyc-nj-explosions-ahmad-khan-rahami.html';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The man believed to be responsible for the explosion in Manhattan on Saturday');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.nytimes.com/interactive/2016/09/15/arts/design/national-museum-of-african-american-history-and-culture.html';
const html =
fs.readFileSync('./fixtures/www.nytimes.com/1474318141888.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.nytimes.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Ahmad Khan Rahami Is Arrested in Manhattan and New Jersey Bombings');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Marc Santora, William K. Rashbaum, Al Baker and Adam Goldman');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-09-19T11:46:01.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://static01.nyt.com/images/2016/09/20/nyregion/20MANHUNT1/20MANHUNT1-facebookJumbo.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.nytimes.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The man believed to be responsible for the explosion in Manhattan on Saturday');
});
});
it('works with a feature story', async () => {
@ -113,9 +94,9 @@ describe('NYTimesExtractor', () => {
const { content, title, author } = await Mercury.parse(uri, html);
const $ = cheerio.load(content);
const text = $('*').first()
.text()
.trim()
.slice(0, 20);
.text()
.trim()
.slice(0, 20);
assert.equal(title, 'I, Too, Sing America');
assert.equal(author, 'The New York Times');

@ -9,101 +9,82 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('PoliticoExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.politico.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.politico.com/index.js.
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
const articleUrl =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Insiders: Trump will sink Pence in VP debate');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.politico.com/index.js.
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
const articleUrl =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Steven Shepard');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.politico.com/index.js.
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
const articleUrl =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-04T09:07:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.politico.com/index.js.
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
const articleUrl =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://static.politico.com/0f/e7/5ee9a89044d1a01f74140bcd5b9e/caucus-vp-preview.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.politico.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
const url =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Tim Kaine isnt Mike Pences only opponent Tuesday night in the only debate');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.politico.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.politico.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Insiders: Trump will sink Pence in VP debate');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.politico.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Steven Shepard');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.politico.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-04T09:07:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.politico.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://static.politico.com/0f/e7/5ee9a89044d1a01f74140bcd5b9e/caucus-vp-preview.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.politico.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Tim Kaine isnt Mike Pences only opponent Tuesday night in the only debate');
});
});
});

@ -7,34 +7,43 @@ import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
// Rename CustomExtractor
describe('CustomExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.theatlantic.com/index.js
// then add your new extractor to
// src/extractors/all.js
const url = 'http://www.theatlantic.com/technology/archive/2016/09/why-new-yorkers-got-a-push-alert-about-a-manhunt/500591/';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
describe('AtlanticExtractor', () => {
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.theatlantic.com/technology/archive/2016/09/why-new-yorkers-got-a-push-alert-about-a-manhunt/500591/';
const html =
fs.readFileSync('./fixtures/www.theatlantic.com/1474321707642.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('works with a starter story', async () => {
// To pass this test, begin filling out your
// selectors in ./src/extractors/custom/www.theatlantic.com/index.js. This test is just
// a stub; you can add more fields to test as much of
// your parser as possible.
const html = fs.readFileSync('./fixtures/www.theatlantic.com/1474321707642.html');
const uri = 'http://www.theatlantic.com/technology/archive/2016/09/why-new-yorkers-got-a-push-alert-about-a-manhunt/500591/';
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.theatlantic.com/index.js
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
const { content, title, author } = await Mercury.parse(uri, html);
const $ = cheerio.load(content);
const text = $('*').first()
.text()
.trim()
.slice(0, 20);
it('works with a starter story', async () => {
// To pass this test, begin filling out your
// selectors in ./src/extractors/custom/www.theatlantic.com/index.js. This test is just
// a stub; you can add more fields to test as much of
// your parser as possible.
const { content, title, author } = await result;
const $ = cheerio.load(content);
const text = $('*').first()
.text()
.trim()
.slice(0, 20);
assert.equal(title, 'Why New Yorkers Received a Push Alert About a Manhunt');
assert.equal(author, 'Kaveh Waddell');
assert.equal(text, 'New York police offi');
assert.equal(title, 'Why New Yorkers Received a Push Alert About a Manhunt');
assert.equal(author, 'Kaveh Waddell');
assert.equal(text, 'New York police offi');
});
});
});

@ -8,119 +8,93 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('WwwThevergeComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
const articleUrl =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'AT&T just declared war on an open internet (and us)');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
const articleUrl =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'T.C. Sottek');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
const articleUrl =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-29T15:00:19.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
const articleUrl =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'Mobilizing Your World sounds like a threat now');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
const articleUrl =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://cdn0.vox-cdn.com/thumbor/v7kU2cISjo-wm6XceGk_kBuMBlA=/0x16:1024x592/1600x900/cdn0.vox-cdn.com/uploads/chorus_image/image/52042639/vrg_tc_attarmy_1024.1480431618.jpeg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.theverge.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
const url =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Last year we won the open internet back, but the new regulations had');
assert.equal($('.e-image--hero').length, 1);
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.theverge.com/2016/11/29/13774648/fcc-att-zero-rating-directv-net-neutrality-vs-tmobile';
const html =
fs.readFileSync('./fixtures/www.theverge.com/1480520999617.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'AT&T just declared war on an open internet (and us)');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'T.C. Sottek');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-29T15:00:19.000Z');
});
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const { dek } = await result;
// Update these values with the expected values from
// the article.
assert.equal(dek, 'Mobilizing Your World sounds like a threat now');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.theverge.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://cdn0.vox-cdn.com/thumbor/v7kU2cISjo-wm6XceGk_kBuMBlA=/0x16:1024x592/1600x900/cdn0.vox-cdn.com/uploads/chorus_image/image/52042639/vrg_tc_attarmy_1024.1480431618.jpeg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.theverge.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Last year we won the open internet back, but the new regulations had');
assert.equal($('.e-image--hero').length, 1);
});
});
it('returns the content from a feature', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.theverge.com/index.js.

@ -8,118 +8,99 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('WwwTmzComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const html =
fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
const articleUrl =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Prince -- Woman Warns Estate ... Step Aside, I\'m His Wife!');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const html =
fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
const articleUrl =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'TMZ STAFF');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const html =
fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
const articleUrl =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
// Note: This is actually wrong, but the error is from TMZ's very bad
// markup. Currently the parser will get it close but not the correct
// timezone. This could be fixed by better markup)
assert.equal(date_published, '2016-11-28T11:00:00.000Z');
});
// it('returns the dek', async () => {
// // To pass this test, fill out the dek selector
// // in ./src/extractors/custom/www.tmz.com/index.js.
// const html =
// fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
// const articleUrl =
// 'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
//
// const { dek } =
// await Mercury.parse(articleUrl, html, { fallback: false });
//
// // Update these values with the expected values from
// // the article.
// assert.equal(dek, '');
// });
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const html =
fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
const articleUrl =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://ll-media.tmz.com/2016/11/28/1128-prince-getty-03-1200x630.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.tmz.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
const url =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Prince was married when he died and wanted all of his money to');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const html =
fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Prince -- Woman Warns Estate ... Step Aside, I\'m His Wife!');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'TMZ STAFF');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
// Note: This is actually wrong, but the error is from TMZ's very bad
// markup. Currently the parser will get it close but not the correct
// timezone. This could be fixed by better markup)
assert.equal(date_published, '2016-11-28T11:00:00.000Z');
});
// it('returns the dek', async () => {
// // To pass this test, fill out the dek selector
// // in ./src/extractors/custom/www.tmz.com/index.js.
// const html =
// fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
// const articleUrl =
// 'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
//
// const { dek } =
// await Mercury.parse(articleUrl, html, { fallback: false });
//
// // Update these values with the expected values from
// // the article.
// assert.equal(dek, '');
// });
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.tmz.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'http://ll-media.tmz.com/2016/11/28/1128-prince-getty-03-1200x630.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.tmz.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Prince was married when he died and wanted all of his money to');
});
});
});

@ -8,99 +8,80 @@ import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
describe('WwwWashingtonpostComExtractor', () => {
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const url =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
const articleUrl =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Trump Foundation admits to violating ban on self-dealing, new filing to IRS shows');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
const articleUrl =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'David A. Fahrenthold');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
const articleUrl =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-22T13:57:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
const articleUrl =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://img.washingtonpost.com/rf/image_1484w/2010-2019/WashingtonPost/2016/11/01/Others/Images/2016-11-01/Trump-HomeSafe-News-131478026931.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
const url =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Painter Michael Israel, left, poses with Donald and Melania Trump in 2007 at');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Trump Foundation admits to violating ban on self-dealing, new filing to IRS shows');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'David A. Fahrenthold');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-11-22T13:57:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://img.washingtonpost.com/rf/image_1484w/2010-2019/WashingtonPost/2016/11/01/Others/Images/2016-11-01/Trump-HomeSafe-News-131478026931.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Painter Michael Israel, left, poses with Donald and Melania Trump in 2007 at');
});
});
});

@ -9,101 +9,82 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('WiredExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.wired.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.wired.com/index.js.
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
const articleUrl =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'An Ode to the Rosetta Spacecraft as It Flings Itself Into a Comet');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.wired.com/index.js.
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
const articleUrl =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Emma Grey Ellis');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.wired.com/index.js.
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
const articleUrl =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-09-30T07:00:12.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.wired.com/index.js.
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
const articleUrl =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://www.wired.com/wp-content/uploads/2016/09/Rosetta_impact-1-1200x630.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.wired.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
const url =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Today, the European Space Agencys Rosetta spacecraft will engage its thrusters for one');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.wired.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.wired.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'An Ode to the Rosetta Spacecraft as It Flings Itself Into a Comet');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.wired.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Emma Grey Ellis');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.wired.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-09-30T07:00:12.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.wired.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://www.wired.com/wp-content/uploads/2016/09/Rosetta_impact-1-1200x630.jpg');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.wired.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'Today, the European Space Agencys Rosetta spacecraft will engage its thrusters for one');
});
});
});

@ -9,101 +9,82 @@ import { excerptContent } from 'utils/text';
// Rename CustomExtractor
describe('YahooExtractor', () => {
it('is selected properly', () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.yahoo.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const url =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
const articleUrl =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const { title } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(title, 'Clinton Cancels Joint Events with Sanders');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
const articleUrl =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const { author } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(author, 'Fox Nation');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
const articleUrl =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const { date_published } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-03T05:00:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
const articleUrl =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const { lead_image_url } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://s.yimg.com/uu/api/res/1.2/tE8CoXSgHD15n5p8wUwGJA--/aD0zMDA7dz02MjQ7c209MTthcHBpZD15dGFjaHlvbg--/http://slingstone.zenfs.com/offnetwork/218c3f97f0b7e1598b6dc9fd10126e22');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
// You may also want to make use of the clean and transform
// options.
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
const url =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const { content } =
await Mercury.parse(url, html, { fallback: false });
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The Hillary Clinton campaign has canceled joint appearances with former primary opponent Bernie');
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
result =
Mercury.parse(url, html, { fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in
// ./src/extractors/custom/www.yahoo.com/index.js
// (e.g., CustomExtractor => NYTimesExtractor)
// then add your new extractor to
// src/extractors/all.js
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(title, 'Clinton Cancels Joint Events with Sanders');
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'Fox Nation');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const { date_published } = await result;
// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-10-03T05:00:00.000Z');
});
it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const { lead_image_url } = await result;
// Update these values with the expected values from
// the article.
assert.equal(lead_image_url, 'https://s.yimg.com/uu/api/res/1.2/tE8CoXSgHD15n5p8wUwGJA--/aD0zMDA7dz02MjQ7c209MTthcHBpZD15dGFjaHlvbg--/http://slingstone.zenfs.com/offnetwork/218c3f97f0b7e1598b6dc9fd10126e22');
});
it('returns the content', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent($('*').first().text(), 13);
// Update these values with the expected values from
// the article.
assert.equal(first13, 'The Hillary Clinton campaign has canceled joint appearances with former primary opponent Bernie');
});
});
});

@ -1,5 +1,14 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1
JSONStream@^1.0.3:
version "1.2.1"
resolved "https://registry.yarnpkg.com/JSONStream/-/JSONStream-1.2.1.tgz#32aa5790e799481083b49b4b7fa94e23bae69bf9"
dependencies:
jsonparse "^1.2.0"
through ">=2.2.7 <3"
abab@^1.0.0:
version "1.0.3"
resolved "https://registry.yarnpkg.com/abab/-/abab-1.0.3.tgz#b81de5f7274ec4e756d797cd834f303642724e5d"
@ -8,13 +17,6 @@ abbrev@1, abbrev@1.0.x:
version "1.0.9"
resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.0.9.tgz#91b4792588a7738c25f35dd6f63752a2f8776135"
accepts@~1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.3.tgz#c3ca7434938648c3e0d9c1e328dd68b622c284ca"
dependencies:
mime-types "~2.1.11"
negotiator "0.6.1"
accepts@1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.1.4.tgz#d71c96f7d41d0feda2c38cd14e8a27c04158df4a"
@ -22,6 +24,13 @@ accepts@1.1.4:
mime-types "~2.0.4"
negotiator "0.4.9"
accepts@~1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.3.tgz#c3ca7434938648c3e0d9c1e328dd68b622c284ca"
dependencies:
mime-types "~2.1.11"
negotiator "0.6.1"
acorn-globals@^1.0.4:
version "1.0.9"
resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-1.0.9.tgz#55bb5e98691507b74579d0513413217c380c54cf"
@ -231,7 +240,7 @@ async-each@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/async-each/-/async-each-1.0.1.tgz#19d386a1d9edc6e7c1c85d388aedbcc56d33602d"
async@^1.4.0, async@^1.4.2, async@1.x:
async@1.x, async@^1.4.0, async@^1.4.2:
version "1.5.2"
resolved "https://registry.yarnpkg.com/async/-/async-1.5.2.tgz#ec6a61ae56480c0c3cb241c95618e20892f9672a"
@ -265,7 +274,7 @@ babel-code-frame@^6.16.0:
esutils "^2.0.2"
js-tokens "^2.0.0"
babel-core@^6.0.0, babel-core@^6.0.14, babel-core@^6.11.4, babel-core@^6.18.0, babel-core@^6.4.0, babel-core@6:
babel-core@6, babel-core@^6.0.0, babel-core@^6.0.14, babel-core@^6.11.4, babel-core@^6.18.0, babel-core@^6.4.0:
version "6.18.2"
resolved "https://registry.yarnpkg.com/babel-core/-/babel-core-6.18.2.tgz#d8bb14dd6986fa4f3566a26ceda3964fa0e04e5b"
dependencies:
@ -917,9 +926,9 @@ browser-pack@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/browser-pack/-/browser-pack-6.0.1.tgz#779887c792eaa1f64a46a22c8f1051cdcd96755f"
dependencies:
JSONStream "^1.0.3"
combine-source-map "~0.7.1"
defined "^1.0.0"
JSONStream "^1.0.3"
through2 "^2.0.0"
umd "^3.0.0"
@ -994,6 +1003,7 @@ browserify@^13.0.0, browserify@^13.1.1:
version "13.1.1"
resolved "https://registry.yarnpkg.com/browserify/-/browserify-13.1.1.tgz#72a2310e2f706ed87db929cf0ee73a5e195d9bb0"
dependencies:
JSONStream "^1.0.3"
assert "~1.3.0"
browser-pack "^6.0.1"
browser-resolve "^1.11.0"
@ -1015,7 +1025,6 @@ browserify@^13.0.0, browserify@^13.1.1:
https-browserify "~0.0.0"
inherits "~2.0.1"
insert-module-globals "^7.0.0"
JSONStream "^1.0.3"
labeled-stream-splicer "^2.0.0"
module-deps "^4.0.8"
os-browserify "~0.1.1"
@ -1251,14 +1260,14 @@ code-point-at@^1.0.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/code-point-at/-/code-point-at-1.1.0.tgz#0d070b4d043a5bea33a2f1a40e2edb3d9a4ccf77"
colors@^1.1.0:
version "1.1.2"
resolved "https://registry.yarnpkg.com/colors/-/colors-1.1.2.tgz#168a4701756b6a7f51a12ce0c97bfa28c084ed63"
colors@1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/colors/-/colors-1.0.3.tgz#0433f44d809680fdeb60ed260f1b0c262e82a40b"
colors@^1.1.0:
version "1.1.2"
resolved "https://registry.yarnpkg.com/colors/-/colors-1.1.2.tgz#168a4701756b6a7f51a12ce0c97bfa28c084ed63"
combine-lists@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/combine-lists/-/combine-lists-1.0.1.tgz#458c07e09e0d900fc28b70a3fec2dacd1d2cb7f6"
@ -1280,7 +1289,7 @@ combined-stream@^1.0.5, combined-stream@~1.0.5:
dependencies:
delayed-stream "~1.0.0"
commander@^2.9.0, commander@2.9.0:
commander@2.9.0, commander@^2.9.0:
version "2.9.0"
resolved "https://registry.yarnpkg.com/commander/-/commander-2.9.0.tgz#9c99094176e12240cb22d6c5146098400fe0f7d4"
dependencies:
@ -1306,17 +1315,17 @@ concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
concat-stream@^1.4.6, concat-stream@^1.4.7, concat-stream@~1.5.0, concat-stream@~1.5.1:
version "1.5.2"
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.5.2.tgz#708978624d856af41a5a741defdd261da752c266"
concat-stream@1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.5.0.tgz#53f7d43c51c5e43f81c8fdd03321c631be68d611"
dependencies:
inherits "~2.0.1"
readable-stream "~2.0.0"
typedarray "~0.0.5"
concat-stream@1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.5.0.tgz#53f7d43c51c5e43f81c8fdd03321c631be68d611"
concat-stream@^1.4.6, concat-stream@^1.4.7, concat-stream@~1.5.0, concat-stream@~1.5.1:
version "1.5.2"
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.5.2.tgz#708978624d856af41a5a741defdd261da752c266"
dependencies:
inherits "~2.0.1"
readable-stream "~2.0.0"
@ -1442,7 +1451,7 @@ css-what@2.1:
version "2.1.0"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-2.1.0.tgz#9467d032c38cfaefb9f2d79501253062f87fa1bd"
"cssom@>= 0.3.0 < 0.4.0", cssom@0.3.x:
cssom@0.3.x, "cssom@>= 0.3.0 < 0.4.0":
version "0.3.1"
resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.3.1.tgz#c9e37ef2490e64f6d1baa10fda852257082c25d3"
@ -1476,21 +1485,21 @@ date-now@^0.1.4:
version "0.1.4"
resolved "https://registry.yarnpkg.com/date-now/-/date-now-0.1.4.tgz#eaf439fd4d4848ad74e5cc7dbef200672b9e345b"
debug@^2.1.1, debug@^2.2.0:
version "2.3.3"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.3.3.tgz#40c453e67e6e13c901ddec317af8986cda9eff8c"
dependencies:
ms "0.7.2"
debug@0.7.4:
version "0.7.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-0.7.4.tgz#06e1ea8082c2cb14e39806e22e2f6f757f92af39"
debug@~2.2.0, debug@2.2.0:
debug@2.2.0, debug@~2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.2.0.tgz#f87057e995b1a1f6ae6a4960664137bc56f039da"
dependencies:
ms "0.7.1"
debug@0.7.4:
version "0.7.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-0.7.4.tgz#06e1ea8082c2cb14e39806e22e2f6f757f92af39"
debug@^2.1.1, debug@^2.2.0:
version "2.3.3"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.3.3.tgz#40c453e67e6e13c901ddec317af8986cda9eff8c"
dependencies:
ms "0.7.2"
decamelize@^1.0.0, decamelize@^1.1.1:
version "1.2.0"
@ -1579,14 +1588,14 @@ di@^0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/di/-/di-0.0.1.tgz#806649326ceaa7caa3306d75d985ea2748ba913c"
diff@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/diff/-/diff-3.0.1.tgz#a52d90cc08956994be00877bff97110062582c35"
diff@1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/diff/-/diff-1.4.0.tgz#7f28d2eb9ee7b15a97efd89ce63dcfdaa3ccbabf"
diff@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/diff/-/diff-3.0.1.tgz#a52d90cc08956994be00877bff97110062582c35"
diffie-hellman@^5.0.0:
version "5.0.2"
resolved "https://registry.yarnpkg.com/diffie-hellman/-/diffie-hellman-5.0.2.tgz#b5835739270cfe26acf632099fded2a07f209e5e"
@ -1601,16 +1610,16 @@ difflib@adampash/difflib.js:
dependencies:
heap ">= 0.2.0"
doctrine@^1.2.2:
version "1.5.0"
resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-1.5.0.tgz#379dce730f6166f76cefa4e6707a159b02c5a6fa"
doctrine@1.3.x:
version "1.3.0"
resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-1.3.0.tgz#13e75682b55518424276f7c173783456ef913d26"
dependencies:
esutils "^2.0.2"
isarray "^1.0.0"
doctrine@1.3.x:
version "1.3.0"
resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-1.3.0.tgz#13e75682b55518424276f7c173783456ef913d26"
doctrine@^1.2.2:
version "1.5.0"
resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-1.5.0.tgz#379dce730f6166f76cefa4e6707a159b02c5a6fa"
dependencies:
esutils "^2.0.2"
isarray "^1.0.0"
@ -1624,7 +1633,7 @@ dom-serialize@^2.2.0:
extend "^3.0.0"
void-elements "^2.0.0"
dom-serializer@~0.1.0, dom-serializer@0:
dom-serializer@0, dom-serializer@~0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.1.0.tgz#073c697546ce0780ce23be4a28e293e40bc30c82"
dependencies:
@ -1635,7 +1644,7 @@ domain-browser@~1.1.0:
version "1.1.7"
resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.1.7.tgz#867aa4b093faa05f1de08c06f4d7b21fdf8698bc"
domelementtype@^1.3.0, domelementtype@1:
domelementtype@1, domelementtype@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.0.tgz#b17aed82e8ab59e52dd9c19b1756e0fc187204c2"
@ -1649,7 +1658,7 @@ domhandler@^2.3.0:
dependencies:
domelementtype "1"
domutils@^1.5.1, domutils@1.5.1:
domutils@1.5.1, domutils@^1.5.1:
version "1.5.1"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf"
dependencies:
@ -1787,7 +1796,7 @@ es6-set@^0.1.4, es6-set@~0.1.3:
es6-symbol "3"
event-emitter "~0.3.4"
es6-symbol@~3.1, es6-symbol@~3.1.0, es6-symbol@3:
es6-symbol@3, es6-symbol@~3.1, es6-symbol@~3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.0.tgz#94481c655e7a7cad82eba832d97d5433496d7ffa"
dependencies:
@ -1807,11 +1816,11 @@ escape-html@~1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
escape-string-regexp@^1.0.2, escape-string-regexp@^1.0.5, escape-string-regexp@1.0.5:
escape-string-regexp@1.0.5, escape-string-regexp@^1.0.2, escape-string-regexp@^1.0.5:
version "1.0.5"
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
escodegen@^1.6.1, escodegen@1.8.x:
escodegen@1.8.x, escodegen@^1.6.1:
version "1.8.1"
resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.8.1.tgz#5a5b53af4693110bebb0867aa3430dd3b70a1018"
dependencies:
@ -1942,7 +1951,7 @@ espree@^3.3.1:
acorn "^4.0.1"
acorn-jsx "^3.0.0"
esprima@^2.6.0, esprima@^2.7.1, esprima@2.7.x:
esprima@2.7.x, esprima@^2.6.0, esprima@^2.7.1:
version "2.7.3"
resolved "https://registry.yarnpkg.com/esprima/-/esprima-2.7.3.tgz#96e3b70d5779f6ad49cd032673d1c312767ba581"
@ -2318,7 +2327,7 @@ glob-parent@^2.0.0:
dependencies:
is-glob "^2.0.0"
glob@^5.0.15, glob@5.x:
glob@5.x, glob@^5.0.15:
version "5.0.15"
resolved "https://registry.yarnpkg.com/glob/-/glob-5.0.15.tgz#1bc936b9e02f4a603fcc222ecf7633d30b8b93b1"
dependencies:
@ -2328,9 +2337,9 @@ glob@^5.0.15, glob@5.x:
once "^1.3.0"
path-is-absolute "^1.0.0"
glob@^7.0.0, glob@^7.0.3, glob@^7.0.5, glob@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.1.tgz#805211df04faaf1c63a3600306cdf5ade50b2ec8"
glob@7.0.5:
version "7.0.5"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.0.5.tgz#b4202a69099bbb4d292a7c1b95b6682b67ebdc95"
dependencies:
fs.realpath "^1.0.0"
inflight "^1.0.4"
@ -2339,9 +2348,9 @@ glob@^7.0.0, glob@^7.0.3, glob@^7.0.5, glob@^7.1.1:
once "^1.3.0"
path-is-absolute "^1.0.0"
glob@7.0.5:
version "7.0.5"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.0.5.tgz#b4202a69099bbb4d292a7c1b95b6682b67ebdc95"
glob@^7.0.0, glob@^7.0.3, glob@^7.0.5, glob@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.1.tgz#805211df04faaf1c63a3600306cdf5ade50b2ec8"
dependencies:
fs.realpath "^1.0.0"
inflight "^1.0.4"
@ -2570,7 +2579,7 @@ inflight@^1.0.4:
once "^1.3.0"
wrappy "1"
inherits@^2.0.1, inherits@~2.0.0, inherits@~2.0.1, inherits@2, inherits@2.0.3:
inherits@2, inherits@2.0.3, inherits@^2.0.1, inherits@~2.0.0, inherits@~2.0.1:
version "2.0.3"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
@ -2629,10 +2638,10 @@ insert-module-globals@^7.0.0:
version "7.0.1"
resolved "https://registry.yarnpkg.com/insert-module-globals/-/insert-module-globals-7.0.1.tgz#c03bf4e01cb086d5b5e5ace8ad0afe7889d638c3"
dependencies:
JSONStream "^1.0.3"
combine-source-map "~0.7.1"
concat-stream "~1.5.1"
is-buffer "^1.1.0"
JSONStream "^1.0.3"
lexical-scope "^1.2.0"
process "~0.11.0"
through2 "^2.0.0"
@ -2791,14 +2800,14 @@ is-utf8@^0.2.0:
version "0.2.1"
resolved "https://registry.yarnpkg.com/is-utf8/-/is-utf8-0.2.1.tgz#4b0da1442104d1b336340e80797e865cf39f7d72"
isarray@^1.0.0, isarray@~1.0.0, isarray@1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
isarray@~0.0.1, isarray@0.0.1:
isarray@0.0.1, isarray@~0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/isarray/-/isarray-0.0.1.tgz#8a18acfca9a8f4177e09abfc6038939b05d1eedf"
isarray@1.0.0, isarray@^1.0.0, isarray@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
isbinaryfile@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/isbinaryfile/-/isbinaryfile-3.0.1.tgz#6e99573675372e841a0520c036b41513d783e79e"
@ -3110,7 +3119,7 @@ js-tokens@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-2.0.0.tgz#79903f5563ee778cc1162e6dcf1a0027c97f9cb5"
js-yaml@^3.5.1, js-yaml@3.x:
js-yaml@3.x, js-yaml@^3.5.1:
version "3.7.0"
resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.7.0.tgz#5c967ddd837a9bfdca5f2de84253abe8a1c03b80"
dependencies:
@ -3204,13 +3213,6 @@ jsonpointer@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/jsonpointer/-/jsonpointer-4.0.0.tgz#6661e161d2fc445f19f98430231343722e1fcbd5"
JSONStream@^1.0.3:
version "1.2.1"
resolved "https://registry.yarnpkg.com/JSONStream/-/JSONStream-1.2.1.tgz#32aa5790e799481083b49b4b7fa94e23bae69bf9"
dependencies:
jsonparse "^1.2.0"
through ">=2.2.7 <3"
jsprim@^1.2.2:
version "1.3.1"
resolved "https://registry.yarnpkg.com/jsprim/-/jsprim-1.3.1.tgz#2a7256f70412a29ee3670aaca625994c4dcff252"
@ -3640,7 +3642,7 @@ mime-types@~2.0.4:
dependencies:
mime-db "~1.12.0"
mime@^1.3.4, mime@1.3.4:
mime@1.3.4, mime@^1.3.4:
version "1.3.4"
resolved "https://registry.yarnpkg.com/mime/-/mime-1.3.4.tgz#115f9e3b6b3daf2959983cb38f149a2d40eb5d53"
@ -3648,7 +3650,7 @@ minimalistic-assert@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/minimalistic-assert/-/minimalistic-assert-1.0.0.tgz#702be2dda6b37f4836bcb3f5db56641b64a1d3d3"
minimatch@^3.0.0, minimatch@^3.0.2, minimatch@^3.0.3, "minimatch@2 || 3":
"minimatch@2 || 3", minimatch@^3.0.0, minimatch@^3.0.2, minimatch@^3.0.3:
version "3.0.3"
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.3.tgz#2a4e4090b96b2db06a9d7df01055a62a77c9b774"
dependencies:
@ -3660,23 +3662,13 @@ minimatch@2.x:
dependencies:
brace-expansion "^1.0.0"
minimist@^1.1.0, minimist@^1.1.1, minimist@^1.2.0, minimist@1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.0.tgz#a35008b20f41383eec1fb914f4cd5df79a264284"
minimist@~0.0.1:
version "0.0.10"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-0.0.10.tgz#de3f98543dbf96082be48ad1a0c7cda836301dcf"
minimist@0.0.8:
minimist@0.0.8, minimist@~0.0.1:
version "0.0.8"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-0.0.8.tgz#857fcabfc3397d2625b8228262e86aa7a011b05d"
mkdirp@^0.5.0, mkdirp@^0.5.1, "mkdirp@>=0.5 0", mkdirp@~0.5.1, mkdirp@0.5.1, mkdirp@0.5.x:
version "0.5.1"
resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.1.tgz#30057438eac6cf7f8c4767f38648d6697d75c903"
dependencies:
minimist "0.0.8"
minimist@1.2.0, minimist@^1.1.0, minimist@^1.1.1, minimist@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.0.tgz#a35008b20f41383eec1fb914f4cd5df79a264284"
mkdirp@0.5.0:
version "0.5.0"
@ -3684,6 +3676,12 @@ mkdirp@0.5.0:
dependencies:
minimist "0.0.8"
mkdirp@0.5.1, mkdirp@0.5.x, "mkdirp@>=0.5 0", mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@~0.5.1:
version "0.5.1"
resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.1.tgz#30057438eac6cf7f8c4767f38648d6697d75c903"
dependencies:
minimist "0.0.8"
mocha@^3.1.2:
version "3.1.2"
resolved "https://registry.yarnpkg.com/mocha/-/mocha-3.1.2.tgz#51f93b432bf7e1b175ffc22883ccd0be32dba6b5"
@ -3704,6 +3702,7 @@ module-deps@^4.0.8:
version "4.0.8"
resolved "https://registry.yarnpkg.com/module-deps/-/module-deps-4.0.8.tgz#55fd70623399706c3288bef7a609ff1e8c0ed2bb"
dependencies:
JSONStream "^1.0.3"
browser-resolve "^1.7.0"
cached-path-relative "^1.0.0"
concat-stream "~1.5.0"
@ -3711,7 +3710,6 @@ module-deps@^4.0.8:
detective "^4.0.0"
duplexer2 "^0.1.2"
inherits "^2.0.1"
JSONStream "^1.0.3"
parents "^1.0.0"
readable-stream "^2.0.2"
resolve "^1.1.3"
@ -3730,14 +3728,10 @@ moment-timezone:
dependencies:
moment ">= 2.6.0"
moment@^2.14.1:
"moment@>= 2.6.0", moment@^2.14.1:
version "2.16.0"
resolved "https://registry.yarnpkg.com/moment/-/moment-2.16.0.tgz#f38f2c97c9889b0ee18fc6cc392e1e443ad2da8e"
"moment@>= 2.6.0":
version "2.17.0"
resolved "https://registry.yarnpkg.com/moment/-/moment-2.17.0.tgz#a4c292e02aac5ddefb29a6eed24f51938dd3b74f"
ms@0.7.1:
version "0.7.1"
resolved "https://registry.yarnpkg.com/ms/-/ms-0.7.1.tgz#9cd13c03adbff25b65effde7ce864ee952017098"
@ -3836,7 +3830,7 @@ node-uuid@~1.4.7:
version "1.4.7"
resolved "https://registry.yarnpkg.com/node-uuid/-/node-uuid-1.4.7.tgz#6da5a17668c4b3dd59623bda11cf7fa4c1f60a6f"
nopt@~3.0.6, nopt@3.x:
nopt@3.x, nopt@~3.0.6:
version "3.0.6"
resolved "https://registry.yarnpkg.com/nopt/-/nopt-3.0.6.tgz#c6465dbf08abcd4db359317f79ac68a646b28ff9"
dependencies:
@ -3907,7 +3901,7 @@ on-finished@~2.3.0:
dependencies:
ee-first "1.1.1"
once@^1.3.0, once@1.x:
once@1.x, once@^1.3.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
dependencies:
@ -4103,7 +4097,7 @@ phantomjs-polyfill-string-includes@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/phantomjs-polyfill-string-includes/-/phantomjs-polyfill-string-includes-1.0.0.tgz#ea180d4bbc24b8d83e477f8ee8893efabcb29393"
phantomjs-prebuilt@^2.1.7:
phantomjs-prebuilt@^2.1.13, phantomjs-prebuilt@^2.1.7:
version "2.1.13"
resolved "https://registry.yarnpkg.com/phantomjs-prebuilt/-/phantomjs-prebuilt-2.1.13.tgz#66556ad9e965d893ca5a7dc9e763df7e8697f76d"
dependencies:
@ -4204,30 +4198,26 @@ public-encrypt@^4.0.0:
parse-asn1 "^5.0.0"
randombytes "^2.0.1"
punycode@^1.3.2, punycode@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
punycode@1.3.2:
version "1.3.2"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d"
punycode@^1.3.2, punycode@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
qjobs@^1.1.4:
version "1.1.5"
resolved "https://registry.yarnpkg.com/qjobs/-/qjobs-1.1.5.tgz#659de9f2cf8dcc27a1481276f205377272382e73"
qs@^6.0.2, qs@~6.3.0:
version "6.3.0"
resolved "https://registry.yarnpkg.com/qs/-/qs-6.3.0.tgz#f403b264f23bc01228c74131b407f18d5ea5d442"
qs@~6.2.0:
version "6.2.1"
resolved "https://registry.yarnpkg.com/qs/-/qs-6.2.1.tgz#ce03c5ff0935bc1d9d69a9f14cbd18e568d67625"
qs@6.2.0:
qs@6.2.0, qs@^6.0.2, qs@~6.2.0:
version "6.2.0"
resolved "https://registry.yarnpkg.com/qs/-/qs-6.2.0.tgz#3b7848c03c2dece69a9522b0fae8c4126d745f3b"
qs@~6.3.0:
version "6.3.0"
resolved "https://registry.yarnpkg.com/qs/-/qs-6.3.0.tgz#f403b264f23bc01228c74131b407f18d5ea5d442"
querystring-es3@~0.2.0:
version "0.2.1"
resolved "https://registry.yarnpkg.com/querystring-es3/-/querystring-es3-0.2.1.tgz#9ec61f79049875707d69414596fd907a4d711e73"
@ -4458,9 +4448,9 @@ request@^2.55.0, request@^2.72.0, request@^2.75.0:
tunnel-agent "~0.4.1"
uuid "^3.0.0"
request@~2.74.0:
version "2.74.0"
resolved "https://registry.yarnpkg.com/request/-/request-2.74.0.tgz#7693ca768bbb0ea5c8ce08c084a45efa05b892ab"
request@czardoz/request:
version "2.74.1"
resolved "https://codeload.github.com/czardoz/request/tar.gz/6ebb4651b72bee7292e54943adfa6b503465adfb"
dependencies:
aws-sign2 "~0.6.0"
aws4 "^1.2.1"
@ -4484,9 +4474,9 @@ request@~2.74.0:
tough-cookie "~2.3.0"
tunnel-agent "~0.4.1"
request@czardoz/request:
version "2.74.1"
resolved "https://codeload.github.com/czardoz/request/tar.gz/6ebb4651b72bee7292e54943adfa6b503465adfb"
request@~2.74.0:
version "2.74.0"
resolved "https://registry.yarnpkg.com/request/-/request-2.74.0.tgz#7693ca768bbb0ea5c8ce08c084a45efa05b892ab"
dependencies:
aws-sign2 "~0.6.0"
aws4 "^1.2.1"
@ -4537,7 +4527,7 @@ resolve-from@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-1.0.1.tgz#26cbfe935d1aeeeabb29bc3fe5aeb01e93d44226"
resolve@^1.1.3, resolve@^1.1.4, resolve@^1.1.6, resolve@^1.1.7, resolve@1.1.7, resolve@1.1.x:
resolve@1.1.7, resolve@1.1.x, resolve@^1.1.3, resolve@^1.1.4, resolve@^1.1.6, resolve@^1.1.7:
version "1.1.7"
resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.1.7.tgz#203114d82ad2c5ed9e8e0411b3932875e889e97b"
@ -4554,7 +4544,7 @@ right-align@^0.1.1:
dependencies:
align-text "^0.1.1"
rimraf@^2.2.8, rimraf@^2.3.3, rimraf@^2.4.3, rimraf@^2.4.4, rimraf@~2.5.1, rimraf@~2.5.4, rimraf@2:
rimraf@2, rimraf@^2.2.8, rimraf@^2.3.3, rimraf@^2.4.3, rimraf@^2.4.4, rimraf@~2.5.1, rimraf@~2.5.4:
version "2.5.4"
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.5.4.tgz#96800093cbf1a0c86bd95b4625467535c29dfa04"
dependencies:
@ -4657,7 +4647,7 @@ sax@^1.1.4:
version "1.2.1"
resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.1.tgz#7b8e656190b228e81a66aea748480d828cd2d37a"
semver@^5.1.0, semver@^5.3.0, semver@~5.3.0, "semver@2 || 3 || 4 || 5":
"semver@2 || 3 || 4 || 5", semver@^5.1.0, semver@^5.3.0, semver@~5.3.0:
version "5.3.0"
resolved "https://registry.yarnpkg.com/semver/-/semver-5.3.0.tgz#9b2ce5d3de02d17c6012ad326aa6b4d0cf54f94f"
@ -4911,10 +4901,6 @@ stream-splicer@^2.0.0:
inherits "^2.0.1"
readable-stream "^2.0.2"
string_decoder@~0.10.0, string_decoder@~0.10.x:
version "0.10.31"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-0.10.31.tgz#62e203bc41766c6c28c9fc84301dab1c5310fa94"
string-direction@^0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/string-direction/-/string-direction-0.1.2.tgz#3d8453e7274a2e44a142b3dc8449dfb64d9ade3a"
@ -4938,6 +4924,10 @@ string.prototype.codepointat@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/string.prototype.codepointat/-/string.prototype.codepointat-0.2.0.tgz#6b26e9bd3afcaa7be3b4269b526de1b82000ac78"
string_decoder@~0.10.0, string_decoder@~0.10.x:
version "0.10.31"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-0.10.31.tgz#62e203bc41766c6c28c9fc84301dab1c5310fa94"
stringstream@~0.0.4:
version "0.0.5"
resolved "https://registry.yarnpkg.com/stringstream/-/stringstream-0.0.5.tgz#4e484cd4de5a0bbbee18e46307710a8a81621878"
@ -4968,16 +4958,16 @@ subarg@^1.0.0:
dependencies:
minimist "^1.1.0"
supports-color@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-2.0.0.tgz#535d045ce6b6363fa40117084629995e9df324c7"
supports-color@^3.1.0, supports-color@^3.1.2, supports-color@3.1.2:
supports-color@3.1.2, supports-color@^3.1.0, supports-color@^3.1.2:
version "3.1.2"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-3.1.2.tgz#72a262894d9d408b956ca05ff37b2ed8a6e2a2d5"
dependencies:
has-flag "^1.0.0"
supports-color@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-2.0.0.tgz#535d045ce6b6363fa40117084629995e9df324c7"
"symbol-tree@>= 3.1.0 < 4.0.0":
version "3.1.4"
resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.1.4.tgz#02b279348d337debc39694c5c95f882d448a312a"
@ -5046,10 +5036,6 @@ throttleit@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/throttleit/-/throttleit-1.0.0.tgz#9e785836daf46743145a5984b6268d828528ac6c"
through@^2.3.6, "through@>=2.2.7 <3":
version "2.3.8"
resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5"
through2@^2.0.0:
version "2.0.1"
resolved "https://registry.yarnpkg.com/through2/-/through2-2.0.1.tgz#384e75314d49f32de12eebb8136b8eb6b5d59da9"
@ -5057,24 +5043,28 @@ through2@^2.0.0:
readable-stream "~2.0.0"
xtend "~4.0.0"
"through@>=2.2.7 <3", through@^2.3.6:
version "2.3.8"
resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5"
timers-browserify@^1.0.1:
version "1.4.2"
resolved "https://registry.yarnpkg.com/timers-browserify/-/timers-browserify-1.4.2.tgz#c9c58b575be8407375cb5e2462dacee74359f41d"
dependencies:
process "~0.11.0"
tmp@^0.0.29:
version "0.0.29"
resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.29.tgz#f25125ff0dd9da3ccb0c2dd371ee1288bb9128c0"
dependencies:
os-tmpdir "~1.0.1"
tmp@0.0.28:
version "0.0.28"
resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.28.tgz#172735b7f614ea7af39664fa84cf0de4e515d120"
dependencies:
os-tmpdir "~1.0.1"
tmp@^0.0.29:
version "0.0.29"
resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.29.tgz#f25125ff0dd9da3ccb0c2dd371ee1288bb9128c0"
dependencies:
os-tmpdir "~1.0.1"
tmpl@1.0.x:
version "1.0.4"
resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.4.tgz#23640dd7b42d00433911140820e5cf440e521dd1"
@ -5123,14 +5113,14 @@ type-check@~0.3.2:
dependencies:
prelude-ls "~1.1.2"
type-detect@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-1.0.0.tgz#762217cc06db258ec48908a1298e8b95121e8ea2"
type-detect@0.1.1:
version "0.1.1"
resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-0.1.1.tgz#0ba5ec2a885640e470ea4e8505971900dac58822"
type-detect@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-1.0.0.tgz#762217cc06db258ec48908a1298e8b95121e8ea2"
type-is@~1.6.13:
version "1.6.14"
resolved "https://registry.yarnpkg.com/type-is/-/type-is-1.6.14.tgz#e219639c17ded1ca0789092dd54a03826b817cb2"
@ -5167,10 +5157,16 @@ umd@^3.0.0:
version "3.0.1"
resolved "http://registry.npmjs.org/umd/-/umd-3.0.1.tgz#8ae556e11011f63c2596708a8837259f01b3d60e"
unpipe@~1.0.0, unpipe@1.0.0:
unpipe@1.0.0, unpipe@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
url@adampash/node-url#feat-remove-punycode:
version "0.11.1"
resolved "https://codeload.github.com/adampash/node-url/tar.gz/01ff22ebee841a0053330d45f221371b41effbbf"
dependencies:
querystring "0.2.0"
url@~0.11.0:
version "0.11.0"
resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1"
@ -5178,12 +5174,6 @@ url@~0.11.0:
punycode "1.3.2"
querystring "0.2.0"
url@adampash/node-url#feat-remove-punycode:
version "0.11.1"
resolved "https://codeload.github.com/adampash/node-url/tar.gz/01ff22ebee841a0053330d45f221371b41effbbf"
dependencies:
querystring "0.2.0"
user-home@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/user-home/-/user-home-2.0.0.tgz#9c70bfd8169bc1dcbf48604e0f04b8b49cde9e9f"
@ -5204,7 +5194,7 @@ util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
util@~0.10.1, util@0.10.3:
util@0.10.3, util@~0.10.1:
version "0.10.3"
resolved "https://registry.yarnpkg.com/util/-/util-0.10.3.tgz#7afb1afe50805246489e3db7fe0ed379336ac0f9"
dependencies:
@ -5308,13 +5298,17 @@ wide-align@^1.1.0:
dependencies:
string-width "^1.0.1"
window-size@0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/window-size/-/window-size-0.1.0.tgz#5438cd2ea93b202efa3a19fe8887aee7c94f9c9d"
window-size@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/window-size/-/window-size-0.2.0.tgz#b4315bb4214a3d7058ebeee892e13fa24d98b075"
window-size@0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/window-size/-/window-size-0.1.0.tgz#5438cd2ea93b202efa3a19fe8887aee7c94f9c9d"
wordwrap@0.0.2:
version "0.0.2"
resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-0.0.2.tgz#b79669bb42ecb409f83d583cad52ca17eaa1643f"
wordwrap@^1.0.0, wordwrap@~1.0.0:
version "1.0.0"
@ -5324,10 +5318,6 @@ wordwrap@~0.0.2:
version "0.0.3"
resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-0.0.3.tgz#a3d5da6cd5c0bc0008d37234bbaf1bed63059107"
wordwrap@0.0.2:
version "0.0.2"
resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-0.0.2.tgz#b79669bb42ecb409f83d583cad52ca17eaa1643f"
worker-farm@^1.3.1:
version "1.3.1"
resolved "https://registry.yarnpkg.com/worker-farm/-/worker-farm-1.3.1.tgz#4333112bb49b17aa050b87895ca6b2cacf40e5ff"
@ -5372,7 +5362,7 @@ xmlhttprequest-ssl@1.5.1:
version "1.5.1"
resolved "https://registry.yarnpkg.com/xmlhttprequest-ssl/-/xmlhttprequest-ssl-1.5.1.tgz#3b7741fea4a86675976e908d296d4445961faa67"
xtend@^4.0.0, "xtend@>=4.0.0 <4.1.0-0", xtend@~4.0.0:
"xtend@>=4.0.0 <4.1.0-0", xtend@^4.0.0, xtend@~4.0.0:
version "4.0.1"
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.1.tgz#a5c6d532be656e23db820efb943a1f04998d63af"
@ -5450,4 +5440,3 @@ yauzl@2.4.1:
yeast@0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/yeast/-/yeast-0.1.2.tgz#008e06d8094320c372dbc2f8ed76a0ca6c8ac419"

Loading…
Cancel
Save