You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/content/scoring/fixtures/get-weight.js

665 lines
17 KiB
JavaScript

const HTML = {
// getWeight fixtures
positiveId: `
<div id="entry">
<p>Ooo good one</p>
</div>
`,
negativeId: `
<div id="adbox">
<p>Ooo good one</p>
</div>
`,
positiveClass: `
<div class="entry">
<p>Ooo good one</p>
</div>
`,
negativeClass: `
<div id="comment ad">
<p>Ooo good one</p>
</div>
`,
positiveIdAndClass: `
<div id="article" class="entry">
<p>Ooo good one</p>
</div>
`,
positiveIdNegClass: `
<div id="article" class="adbox">
<p>Ooo good one</p>
</div>
`,
positivePhotoClass: `
<div class="figure">
<p>Ooo good one</p>
</div>
`,
positiveIdAndPhoto: `
<div id="article" class="figure">
<p>Ooo good one</p>
</div>
`,
entryContentAsset: `
<div id="foo" class="entry-content-asset">
<p>Ooo good one</p>
</div>
`,
// stripUnlikelyCandidates
noMatches: `
<div id="foo">
<p>Ooo good one</p>
</div>
`,
whitelistMatch: {
before: `
<div class="header">Stuff</div>
<div class="article">
<p>Ooo good one</p>
</div>
`,
after: `
<div class="article">
<p>Ooo good one</p>
</div>
`,
},
whiteAndBlack: {
before: `
<div class="article adbox">
<p>Ooo good one</p>
</div>
`,
after: `
<div class="article adbox">
<p>Ooo good one</p>
</div>
`,
},
whiteInsideBlack: {
before: `
<div>
<div class="adbox">
<div class="article">
<p>Ooo good one</p>
</div>
</div>
<div>Something unrelated</div>
</div>
`,
after: `
<div>
<div>Something unrelated</div>
</div>
`,
},
// brsToPs
singleBr: {
before: `
<div class="article adbox">
<br>
<p>Ooo good one</p>
</div>
`,
after: `
<div class="article adbox">
<br>
<p>Ooo good one</p>
</div>
`,
},
doubleBrs: {
before: `
<div class="article adbox">
<br />
<br />
<p>Ooo good one</p>
</div>
`,
after: `
<div class="article adbox">
<p> </p><p>Ooo good one</p>
</div>
`,
},
severalBrs: {
before: `
<div class="article adbox">
<br />
<br />
<br />
<br />
<br />
<p>Ooo good one</p>
</div>
`,
after: `
<div class="article adbox">
<p> </p><p>Ooo good one</p>
</div>
`,
},
brsInP: {
before: `
<p>
Here is some text
<br />
<br />
Here is more text
</p>
`,
after: `
<p>
Here is some text
<p>
Here is more text
</p></p>
`,
},
paragraphize: {
before: `
<p>
Here is some text
<br />
Here is more text
<span>And also this</span>
</p>
`,
after: `
<p>
Here is some text
<p>
Here is more text
<span>And also this</span>
</p></p>
`,
},
paragraphizeBlock: {
before: `
<p>
Here is some text
<br />
Here is more text
<div>And also this</div>
</p>
`,
after: `
<p>
Here is some text
<p>
Here is more text
</p><div>And also this</div>
</p>
`,
},
// convertToParagraphs
convertToParagraphs: {
before: `
<p>
Here is some text
<span>This should remain in a p</span>
<br />
<br />
This should be wrapped in a p
<div>This should become a p</div>
</p>
<span>This should become a p</span>
`,
after: `
<p>
Here is some text
<span>This should remain in a p</span>
<p>
This should be wrapped in a p
</p><p>This should become a p</p>
</p> <p>This should become a p</p>
`,
},
// linkDensity
linkDensity5: `
<div><p>Some text!</p><p><a href="">Some text!</a></p> </div>
`,
linkDensity1: `
<div><p><a href="">Some text!</a></p></div>
`,
linkDensity0: `
<div><p><a href=""></a></p></div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
// rewriteTopLevel
rewriteHTMLBody: {
before: `
<html><body><div><p><a href="">Wow how about that</a></p></div></body></html>
`,
after: `
<div><div><div><p><a href="">Wow how about that</a></p></div></div></div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// cleanImages
cleanSmallImages: {
before: `
<div>
<img width="5" height="5" />
<img width="50" />
</div>
`,
after: `
<div>
<img width="50">
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
cleanHeight: {
before: `
<div>
<img width="50" height="50" />
</div>
`,
after: `
<div>
<img width="50">
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
cleanSpacer: {
before: `
<div>
<img src="/foo/bar/baz/spacer.png" />
<img src="/foo/bar/baz/normal.png" />
<p>Some text</p>
</div>
`,
after: `
<div>
<img src="/foo/bar/baz/normal.png">
<p>Some text</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// stripJunkTags
stripsJunk: {
before: `
<div>
<style>.red { color: 'red'; }</style>
<title>WOW</title>
<link rel="asdflkjawef" />
<p>What an article</p>
<script type="text/javascript">alert('hi!');</script>
<noscript>Don't got it</noscript>
<hr />
</div>
`,
after: `
<div>
<p>What an article</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// stripHOnes
removeTwoHOnes: {
before: `
<div>
<h1>Look at this!</h1>
<p>What do you think?</p>
<h1>Can you believe it?!</h1>
</div>
`,
after: `
<div>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
convertThreeHOnes: {
before: `
<div>
<h1>Look at this!</h1>
<p>What do you think?</p>
<h1>Can you believe it?!</h1>
<p>What do you think?</p>
<h1>Can you believe it?!</h1>
</div>
`,
after: `
<div>
<h2>Look at this!</h2>
<p>What do you think?</p>
<h2>Can you believe it?!</h2>
<p>What do you think?</p>
<h2>Can you believe it?!</h2>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// cleanAttributes
removeStyle: {
before: `
<div>
<p style="color: red;">What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
removeAlign: {
before: `
<div>
<p style="color: red;" align="center">What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// removeEmpty
removeEmptyP: {
before: `
<div>
<p>What do you think?</p>
<p></p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
doNotRemoveBr: {
before: `
<div>
<p>What do you think?</p>
<p></p>
<div></div>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<div></div>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
doNotNested: {
before: `
<div>
<p>What do you think?</p>
<p><img src="foo/bar.jpg" /></p>
<p><iframe src="foo/bar.jpg" /></p>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<p><img src="foo/bar.jpg" /></p>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// cleanConditionally
dropNegativeScore: {
before: `
<div>
<p>What do you think?</p>
<p>
<ul score="-10">
<li>Foo</li>
<li>Bar</li>
</ul>
</p>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<p>
</p>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
removeTooManyInputs: {
before: `
<div>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<div>
<p>What is your name?</p>
<input type="text"></input>
<p>What is your name?</p>
<input type="text"></input>
<p>What is your name?</p>
<input type="text"></input>
</div>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
removeShortNoImg: {
before: `
<div>
<p>What do you think?</p>
<div>
<p>Keep this one</p>
<img src="asdf" />
</div>
<div>
<p>Lose this one</p>
</div>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<div>
<p>Keep this one</p>
<img src="asdf">
</div>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
linkDensityHigh: {
before: `
<div score="0">
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu.</p>
<ul>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
</ul>
<ul score="20">
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
</ul>
</div>
`,
after: `
<div>
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu.</p>
<ul>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
</ul>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
goodScoreTooDense: {
before: `
<div>
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu.</p>
<ul>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
</ul>
<ul score="30">
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
</ul>
</div>
`,
after: `
<div>
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu.</p>
<ul>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
<li>Keep this one</li>
</ul>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
previousEndsInColon: {
before: `
<div weight="40">
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu.</p>
<p>Now read these links: </p>
<ul score="30">
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
</ul>
</div>
`,
},
cleanEntryContentAsset: {
before: `
<div score="100">
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu.</p>
<ul score="20" class="entry-content-asset">
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
<li><a href="#">Lose this one</a></li>
</ul>
</div>
`,
},
// normalizeSpaces
normalizeSpaces: {
before: `
<div>
<p>What do you think?</p>
</div>
`,
after: 'What do you think?',
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
// cleanHeaders
cleanFirstHeds: {
before: `
<div>
<h2>Lose me</h2>
<p>What do you think?</p>
<h2>Keep me</h2>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<h2>Keep me</h2>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
cleanTitleMatch: {
before: `
<div>
<p>What do you think?</p>
<h2>Title Match</h2>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
dropWithNegativeWeight: {
before: `
<div>
<p>What do you think?</p>
<h2 class="advert">Bad Class, Bad Weight</h2>
<p>What do you think?</p>
</div>
`,
after: `
<div>
<p>What do you think?</p>
<p>What do you think?</p>
</div>
`,
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
},
};
export default HTML;