release: 1.0.3 (#62)

feat-only-run-changes 1.0.3
Adam Pash 8 years ago committed by GitHub
parent 861c5f0dcb
commit a710efd2d5

@ -1,5 +1,18 @@
# Mercury Parser Changelog
### 1.0.3 (Dec 9, 2016)
##### Commits
* [[`861c5f0dcb`](https://github.com/postlight/mercury-parser/commit/861c5f0dcb)] - **feat**: bustle extractor (#60) (Janet)
* [[`06397a4360`](https://github.com/postlight/mercury-parser/commit/06397a4360)] - **feat**: browser-friendly selector for medium (#61) (Adam Pash)
* [[`3297ab079d`](https://github.com/postlight/mercury-parser/commit/3297ab079d)] - **feat**: bloomberg extractor (#59) (Adam Pash)
* [[`e55e9da534`](https://github.com/postlight/mercury-parser/commit/e55e9da534)] - **feat**: sbnation extractor (#55) (Janet)
* [[`8070e4790b`](https://github.com/postlight/mercury-parser/commit/8070e4790b)] - **test**: streamlined guardian tests w/new single-extraction (#58) (Adam Pash)
* [[`bdb751fb53`](https://github.com/postlight/mercury-parser/commit/bdb751fb53)] - **feat**: more cleaning for wired (#56) (Adam Pash)
* [[`e7e41bd242`](https://github.com/postlight/mercury-parser/commit/e7e41bd242)] - **feat**: the guardian custom extractor (#41) (Janet)
* [[`332f85928f`](https://github.com/postlight/mercury-parser/commit/332f85928f)] - **release**: 1.0.2 (#54) (Adam Pash)
### 1.0.2 (Dec 6, 2016)
##### Commits

177
dist/mercury.js vendored

@ -422,7 +422,8 @@ var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
return '[' + selector + ']';
});
var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt', 'xlink:href', 'width', 'height'];
var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
// removeEmpty
@ -2183,7 +2184,7 @@ var WiredExtractor = {
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.visually-hidden']
clean: ['.visually-hidden', 'figcaption img.photo']
},
date_published: {
@ -2661,7 +2662,7 @@ var MediumExtractor = {
},
content: {
selectors: ['.section-content'],
selectors: ['.section-content', 'article > div > section'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
@ -3128,6 +3129,170 @@ var WwwYoutubeComExtractor = {
}
};
var WwwTheguardianComExtractor = {
domain: 'www.theguardian.com',
title: {
selectors: ['.content__headline']
},
author: {
selectors: ['p.byline']
},
date_published: {
selectors: [['meta[name="article:published_time"]', 'value']]
},
dek: {
selectors: ['.content__standfirst']
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['.content__article-body'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.hide-on-mobile', '.inline-icon']
}
};
var WwwSbnationComExtractor = {
domain: 'www.sbnation.com',
title: {
selectors: ['h1.c-page-title']
},
author: {
selectors: [['meta[name="author"]', 'value']]
},
date_published: {
selectors: [['meta[name="article:published_time"]', 'value']]
},
dek: {
selectors: ['h2.c-entry-summary.p-dek']
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['div.c-entry-content'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
}
};
var WwwBloombergComExtractor = {
domain: 'www.bloomberg.com',
title: {
selectors: [
// normal articles
'.lede-headline',
// /graphics/ template
'h1.article-title',
// /news/ template
'h1.lede-text-only__hed']
},
author: {
selectors: [['meta[name="parsely-author"]', 'value'], '.byline-details__link',
// /graphics/ template
'.bydek',
// /news/ template
'.author']
},
date_published: {
selectors: [['time.published-at', 'datetime'], ['time[datetime]', 'datetime'], ['meta[name="date"]', 'value'], ['meta[name="parsely-pub-date"]', 'value']]
},
dek: {
selectors: []
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['.article-body__content',
// /graphics/ template
['section.copy-block'],
// /news/ template
'.body-copy'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.inline-newsletter', '.page-ad']
}
};
var WwwBustleComExtractor = {
domain: 'www.bustle.com',
title: {
selectors: ['h1.post-page__title']
},
author: {
selectors: ['div.content-meta__author']
},
date_published: {
selectors: [['time.content-meta__published-date[datetime]', 'datetime']]
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['.post-page__body'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
}
};
var CustomExtractors = Object.freeze({
@ -3157,7 +3322,11 @@ var CustomExtractors = Object.freeze({
WwwThevergeComExtractor: WwwThevergeComExtractor,
WwwCnnComExtractor: WwwCnnComExtractor,
WwwAolComExtractor: WwwAolComExtractor,
WwwYoutubeComExtractor: WwwYoutubeComExtractor
WwwYoutubeComExtractor: WwwYoutubeComExtractor,
WwwTheguardianComExtractor: WwwTheguardianComExtractor,
WwwSbnationComExtractor: WwwSbnationComExtractor,
WwwBloombergComExtractor: WwwBloombergComExtractor,
WwwBustleComExtractor: WwwBustleComExtractor
});
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
{
"name": "mercury-parser",
"version": "1.0.2",
"version": "1.0.3",
"description": "",
"repository": "github:postlight/mercury-parser",
"main": "./dist/mercury.js",

Loading…
Cancel
Save