pull/44/merge
Anders Norås 9 years ago
commit 2abc32d3d2

1
.gitignore vendored

@ -1,3 +1,4 @@
.idea
*.pyc
*.egg-info
build

@ -474,7 +474,7 @@ class Document:
#if el.tag == 'div' and counts["img"] >= 1:
# continue
if counts["p"] and counts["img"] > counts["p"]:
if counts["p"] and counts["img"] > counts["p"] * 2:
reason = "too many images (%s)" % counts["img"]
to_remove = True
elif counts["li"] > counts["p"] and tag != "ul" and tag != "ol":

@ -193,19 +193,19 @@ if (cnnPage.isHomepage) {
*/
//--></script>
</dl></a></li>
<li class="cnnItem1"><dl><script type="text/javascript">
var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
/*turning off 50/50 for now*/
/*if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/dropdown_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
} else {*/
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd style="margin-left:-79px"><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/2012_images/cm/bn_2osi16579_290x162_v1.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
//}
</script>
<li class="cnnItem1"><dl><script type="text/javascript">
var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
/*turning off 50/50 for now*/
/*if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/dropdown_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
} else {*/
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd style="margin-left:-79px"><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/2012_images/cm/bn_2osi16579_290x162_v1.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
//}
</script>
</dl></li>
<li class="cnnItem2"><dl><!--Default ROS
<a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1001406.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe3"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn3_170x30_sigift.png" alt="Give the Gift of SI" title="Give the Gift of SI"/></a>
@ -586,8 +586,8 @@ DIV.cnnTopnav LI.cnnFirst { padding-left:0px; }
<!-- end content -->
<!-- start contentFooter -->
<div class="cnnWideSL"><script type="text/javascript">adsonar_placementId=1488671;adsonar_pid=769769;adsonar_ps=-1;adsonar_zw=978;adsonar_zh=150;</script><script>cnnad_createSL();</script></div>
<!-- start footerbox -->
<div class="cnnWideSL"><script type="text/javascript">adsonar_placementId=1488671;adsonar_pid=769769;adsonar_ps=-1;adsonar_zw=978;adsonar_zh=150;</script><script>cnnad_createSL();</script></div>
<!-- start footerbox -->
<div class="cnnFooterBox">
<div class="cnnHolder">
<div class="cnnRight">
@ -630,17 +630,17 @@ DIV.cnnTopnav LI.cnnFirst { padding-left:0px; }
</div>
</div>
</div>
<!-- end footerbox -->
<!-- start searchbar -->
<div class="cnnSearchFooter">
<div class="cnnCenter"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="footer_search"><input id="searchInputFooter" type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
</div>
<!-- end searchbar -->
<!--START OF PAGELINKS.JS-->
<!-- end footerbox -->
<!-- start searchbar -->
<div class="cnnSearchFooter">
<div class="cnnCenter"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="footer_search"><input id="searchInputFooter" type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
</div>
<!-- end searchbar -->
<!--START OF PAGELINKS.JS-->
<script language="Javascript">// Post Processing code to update links with tracking references
var url = window.location.href.toString();
@ -692,9 +692,9 @@ if (cnnPage.isHomepage) {
}
/* Poll frame height issue */
if ($e('cnnPollFrame')) { $e('cnnPollFrame').setAttribute('height','169'); }
}</script>
<!--END OF PAGELINKS.JS-->
}</script>
<!--END OF PAGELINKS.JS-->
</div>
<div><!-- move tracking out of cnnpage -->
<!-- ADBP/JSMD -->
@ -753,7 +753,7 @@ _qoptions={
<!-- /TIIAD -->
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/pagelinks.js" type="text/javascript"></script>
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/subnav.js" type="text/javascript"></script>
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/subnav.js" type="text/javascript"></script>
<!-- end contentFooter -->

File diff suppressed because one or more lines are too long

@ -37,3 +37,9 @@ class TestArticleOnly(unittest.TestCase):
res = doc.summary(html_partial=True)
self.assertEqual('<div><div class="', res[0:17])
def test_too_many_images_sample_html_partial(self):
sample = load_sample('too-many-images.sample.html')
doc = Document(sample)
res = doc.summary(html_partial=True)
self.assertEqual('<div><div class="post-body', res[0:26])

Loading…
Cancel
Save