release: 1.0.1 (#48)

pull/51/head 1.0.1
Adam Pash 8 years ago committed by GitHub
parent f9902cfa05
commit edcb7295d1

@ -0,0 +1,8 @@
* [[`19ed035382`](https://github.com/postlight/mercury-parser/commit/19ed035382)] - **release**: 1.0.1 (Adam Pash)
* [[`f9902cfa05`](https://github.com/postlight/mercury-parser/commit/f9902cfa05)] - **Fix**: extension bugs (#47) (Adam Pash)
* [[`16860f1d85`](https://github.com/postlight/mercury-parser/commit/16860f1d85)] - **feat**: improved nyt parser (#46) (Adam Pash)
* [[`d0453efbf8`](https://github.com/postlight/mercury-parser/commit/d0453efbf8)] - **feat**: improvements for nyer magazine articles (#45) (Adam Pash)
* [[`00f8965c1f`](https://github.com/postlight/mercury-parser/commit/00f8965c1f)] - **fix**: cleaning up deks (#44) (Adam Pash)
* [[`b415d1d37c`](https://github.com/postlight/mercury-parser/commit/b415d1d37c)] - **feat**: aol custom extractor (#42) (Janet)
* [[`4cc3b68b5e`](https://github.com/postlight/mercury-parser/commit/4cc3b68b5e)] - **feat**: remove footer links (#40) (Matt)
* [[`e9a36d6ebd`](https://github.com/postlight/mercury-parser/commit/e9a36d6ebd)] - **release**: 1.0.0 so we can start doing proper releaes (#39) (Adam Pash)

104
dist/mercury.js vendored

@ -2061,12 +2061,16 @@ var NYTimesExtractor = {
}
},
clean: ['.ad', 'header#story-header', '.story-body-1 .lede.video', '.visually-hidden', '#newsletter-promo', '.promo', '.comments-button', '.hidden', '.comments', '.supplemental', '.nocontent']
clean: ['.ad', 'header#story-header', '.story-body-1 .lede.video', '.visually-hidden', '#newsletter-promo', '.promo', '.comments-button', '.hidden', '.comments', '.supplemental', '.nocontent', '.story-footer-links']
},
date_published: null,
date_published: {
selectors: [['meta[name="article:published"]', 'value']]
},
lead_image_url: null,
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
dek: null,
@ -2088,7 +2092,7 @@ var TheAtlanticExtractor = {
},
content: {
selectors: ['.article-body'],
selectors: [['.article-cover figure.lead-img', '.article-body'], '.article-body'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
@ -2097,7 +2101,7 @@ var TheAtlanticExtractor = {
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
clean: ['.partner-box']
},
date_published: {
@ -2106,8 +2110,6 @@ var TheAtlanticExtractor = {
lead_image_url: null,
dek: null,
next_page_url: null,
excerpt: null
@ -2140,7 +2142,9 @@ var NewYorkerExtractor = {
},
date_published: {
selectors: [['meta[name="article:published_time"]', 'value']]
selectors: [['meta[name="article:published_time"]', 'value'], ['time[itemProp="datePublished"]', 'content']],
timezone: 'America/New_York'
},
lead_image_url: {
@ -2148,7 +2152,7 @@ var NewYorkerExtractor = {
},
dek: {
selectors: [['meta[name="og:description"]', 'value']]
selectors: ['.dek', 'h2.dek']
},
next_page_url: null,
@ -2191,7 +2195,7 @@ var WiredExtractor = {
},
dek: {
selectors: [['meta[name="og:description"]', 'value']]
selectors: []
},
next_page_url: null,
@ -2234,7 +2238,7 @@ var MSNExtractor = {
},
dek: {
selectors: [['meta[name="description"]', 'value']]
selectors: []
},
next_page_url: null,
@ -2279,7 +2283,9 @@ var YahooExtractor = {
},
dek: {
selectors: [['meta[name="og:description"]', 'value']]
selectors: [
// enter dek selectors
]
},
next_page_url: null,
@ -2301,20 +2307,30 @@ var BuzzfeedExtractor = {
},
content: {
selectors: ['#buzz_sub_buzz'],
selectors: [['.longform_custom_header_media', '#buzz_sub_buzz'], '#buzz_sub_buzz'],
defaultCleaner: false,
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
h2: 'b'
h2: 'b',
'div.longform_custom_header_media': function divLongform_custom_header_media($node) {
if ($node.has('img') && $node.has('.longform_header_image_source')) {
return 'figure';
}
return null;
},
'figure.longform_custom_header_media .longform_header_image_source': 'figcaption'
},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.instapaper_ignore', '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline', '.share-box']
clean: ['.instapaper_ignore', '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline', '.share-box', '.print']
},
date_published: {
@ -2326,7 +2342,7 @@ var BuzzfeedExtractor = {
},
dek: {
selectors: [['meta[name="description"]', 'value']]
selectors: []
},
next_page_url: null,
@ -2369,7 +2385,7 @@ var WikiaExtractor = {
},
dek: {
selectors: [['meta[name="og:description"]', 'value']]
selectors: []
},
next_page_url: null,
@ -2455,7 +2471,7 @@ var PoliticoExtractor = {
},
dek: {
selectors: [['meta[name="description"]', 'value']]
selectors: []
},
next_page_url: null,
@ -2556,7 +2572,7 @@ var BroadwayWorldExtractor = {
},
dek: {
selectors: [['meta[name="og:description"]', 'value']]
selectors: []
},
next_page_url: {
@ -2615,7 +2631,7 @@ var ApartmentTherapyExtractor = {
},
dek: {
selectors: [['meta[name=description]', 'value']]
selectors: []
},
next_page_url: {
@ -2758,7 +2774,7 @@ var WwwWashingtonpostComExtractor = {
},
dek: {
selectors: [['meta[name="og:description"]', 'value']]
selectors: []
},
lead_image_url: {
@ -2979,8 +2995,6 @@ var WwwCnnComExtractor = {
selectors: [['meta[name="pubdate"]', 'value']]
},
dek: null,
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
@ -3025,6 +3039,47 @@ var WwwCnnComExtractor = {
}
};
var WwwAolComExtractor = {
domain: 'www.aol.com',
title: {
selectors: ['h1.p-article__title']
},
author: {
selectors: [['meta[name="author"]', 'value']]
},
date_published: {
selectors: ['.p-article__byline__date'],
timezone: 'America/New_York'
},
dek: {
selectors: [
// enter selectors
]
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['.article-content'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
}
};
var CustomExtractors = Object.freeze({
@ -3052,7 +3107,8 @@ var CustomExtractors = Object.freeze({
NewrepublicComExtractor: NewrepublicComExtractor,
MoneyCnnComExtractor: MoneyCnnComExtractor,
WwwThevergeComExtractor: WwwThevergeComExtractor,
WwwCnnComExtractor: WwwCnnComExtractor
WwwCnnComExtractor: WwwCnnComExtractor,
WwwAolComExtractor: WwwAolComExtractor
});
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
{
"name": "mercury-parser",
"version": "1.0.0",
"version": "1.0.1",
"description": "",
"repository": "github:postlight/mercury-parser",
"main": "./dist/mercury.js",
@ -10,6 +10,7 @@
"lint-fix-quiet": "eslint --fix --quiet",
"build": "yarn lint && rollup -c && yarn test:build",
"build:web": "yarn lint && rollup -c rollup.config.web.js && yarn test:build:web",
"release": "yarn build && yarn build:web",
"build:generator": "rollup -c scripts/rollup.config.js",
"test_build": "rollup -c",
"test": "yarn test:node && yarn test:web",

Loading…
Cancel
Save