release: 1.0.5 (#136)

7 years ago · 601b0fac16
parent 6bd6278a07
commit 601b0fac16
5 changed files with 120 additions and 17 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,16 @@
 # Mercury Parser Changelog

+### 1.0.5 (Feb 1, 2017)
+
+##### Commits
+
+* [[`6bd6278a07`](https://github.com/postlight/mercury-parser/commit/6bd6278a07)] - **feat**: custom parser for wh blog (#130) (Adam Pash) 
+* [[`aa682d71e8`](https://github.com/postlight/mercury-parser/commit/aa682d71e8)] - **fix**: medium bug (#129) (Adam Pash) 
+* [[`4e049de61a`](https://github.com/postlight/mercury-parser/commit/4e049de61a)] - **fix**: i put a bad comment in .gitattributes (#125) (Adam Pash) 
+* [[`8aa215c4c2`](https://github.com/postlight/mercury-parser/commit/8aa215c4c2)] - **chore**: marking html fixtures as "vendored" (#124) (Adam Pash) 
+* [[`31eb4f9222`](https://github.com/postlight/mercury-parser/commit/31eb4f9222)] - **Feat**: LinkedIn parser (#123) (Adam Pash) 
+* [[`dbc706410b`](https://github.com/postlight/mercury-parser/commit/dbc706410b)] - **release**: 1.0.4 (#122) (Adam Pash) 
+
 ### 1.0.4 (Jan 26, 2017)

 ##### Commits
--- a/dist/mercury.js
+++ b/dist/mercury.js
@ -2662,7 +2662,7 @@ var MediumExtractor = {
  },

  content: {
-    selectors: ['.section-content', 'article > div > section'],
+    selectors: [['.section-content'], '.section-content', 'article > div > section'],

    // Is there anything in the content you selected that needs transformed
    // before it's consumable content? E.g., unusual lazy loaded images
@ -2681,9 +2681,19 @@ var MediumExtractor = {

          $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);
          var $parent = $node.parents('figure');
-          $parent.prepend($node.clone());
-          $node.remove();
+          var $caption = $parent.find('figcaption');
+          $parent.empty().append([$node, $caption]);
        }
+      },
+
+      // rewrite figures to pull out image and caption, remove rest
+      figure: function figure($node) {
+        // ignore if figure has an iframe
+        if ($node.find('iframe').length > 0) return;
+
+        var $img = $node.find('img').slice(-1)[0];
+        var $caption = $node.find('figcaption');
+        $node.empty().append([$img, $caption]);
      }
    },

@ -4183,6 +4193,86 @@ var FortuneComExtractor = {
  }
 };

+var WwwLinkedinComExtractor = {
+  domain: 'www.linkedin.com',
+
+  title: {
+    selectors: ['.article-title', 'h1']
+  },
+
+  author: {
+    selectors: [['meta[name="article:author"]', 'value'], '.entity-name a[rel=author]']
+  },
+
+  date_published: {
+    selectors: [['time[itemprop="datePublished"]', 'datetime']],
+
+    timezone: 'America/Los_Angeles'
+  },
+
+  dek: {
+    selectors: [
+      // enter selectors
+    ]
+  },
+
+  lead_image_url: {
+    selectors: [['meta[name="og:image"]', 'value']]
+  },
+
+  content: {
+    selectors: [['header figure', '.prose'], '.prose'],
+
+    // Is there anything in the content you selected that needs transformed
+    // before it's consumable content? E.g., unusual lazy loaded images
+    transforms: {},
+
+    // Is there anything that is in the result that shouldn't be?
+    // The clean selectors will remove anything that matches from
+    // the result
+    clean: ['.entity-image']
+  }
+};
+
+var ObamawhitehouseArchivesGovExtractor = {
+  domain: 'obamawhitehouse.archives.gov',
+
+  supportedDomains: ['whitehouse.gov'],
+
+  title: {
+    selectors: ['h1', '.pane-node-title']
+  },
+
+  author: {
+    selectors: ['.blog-author-link', '.node-person-name-link']
+  },
+
+  date_published: {
+    selectors: [['meta[name="article:published_time"]', 'value']]
+  },
+
+  dek: {
+    selectors: ['.field-name-field-forall-summary']
+  },
+
+  lead_image_url: {
+    selectors: [['meta[name="og:image"]', 'value']]
+  },
+
+  content: {
+    selectors: ['.pane-node-field-forall-body'],
+
+    // Is there anything in the content you selected that needs transformed
+    // before it's consumable content? E.g., unusual lazy loaded images
+    transforms: {},
+
+    // Is there anything that is in the result that shouldn't be?
+    // The clean selectors will remove anything that matches from
+    // the result
+    clean: []
+  }
+};
+


 var CustomExtractors = Object.freeze({
@ -4241,7 +4331,9 @@ var CustomExtractors = Object.freeze({
 	WwwNjComExtractor: WwwNjComExtractor,
 	WwwInquisitrComExtractor: WwwInquisitrComExtractor,
 	WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
-	FortuneComExtractor: FortuneComExtractor
+	FortuneComExtractor: FortuneComExtractor,
+	WwwLinkedinComExtractor: WwwLinkedinComExtractor,
+	ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor
 });

 var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
--- a/dist/mercury.js.map
+++ b/dist/mercury.js.map
--- a/dist/mercury.web.js
+++ b/dist/mercury.web.js
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "mercury-parser",
-  "version": "1.0.4",
+  "version": "1.0.5",
  "description": "",
  "repository": "github:postlight/mercury-parser",
  "main": "./dist/mercury.js",