Merge pull request #15 from mitechie/master

New option only_document of Document.summary(), fixed issue GH-13 with "<body/>", added some docs, tests, and code quality improvements. Thanks, Rick!
pull/17/merge
Yuri Baburov 12 years ago
commit a1ae4eaf72

@ -33,3 +33,12 @@ Usage::
Command-line usage::
python -m readability.readability -u http://pypi.python.org/pypi/readability-lxml
Document() kwarg options:
- attributes:
- debug: output debug messages
- min_text_length:
- retry_length:
- url: will allow adjusting links to be absolute

@ -1,14 +1,25 @@
#!/usr/bin/env python
from cleaners import html_cleaner, clean_attributes
from collections import defaultdict
from htmls import build_doc, get_body, get_title, shorten_title
from lxml.etree import tostring, tounicode
from lxml.html import fragment_fromstring, document_fromstring
import logging
import re
import sys
from collections import defaultdict
from lxml.etree import tostring
from lxml.etree import tounicode
from lxml.html import document_fromstring
from lxml.html import fragment_fromstring
from cleaners import clean_attributes
from cleaners import html_cleaner
from htmls import build_doc
from htmls import get_body
from htmls import get_title
from htmls import shorten_title
logging.basicConfig(level=logging.INFO)
log = logging.getLogger()
REGEXES = {
'unlikelyCandidatesRe': re.compile('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter', re.I),
@ -25,11 +36,17 @@ REGEXES = {
#skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
}
class Unparseable(ValueError):
pass
def describe(node, depth=1):
if not hasattr(node, 'tag'):
return "[%s]" % type(node)
name = node.tag
if node.get('id', ''): name += '#'+node.get('id')
if node.get('id', ''):
name += '#' + node.get('id')
if node.get('class', ''):
name += '.' + node.get('class').replace(' ', '.')
if name[:4] in ['div#', 'div.']:
@ -38,8 +55,10 @@ def describe(node, depth=1):
return name + ' - ' + describe(node.getparent(), depth - 1)
return name
def to_int(x):
if not x: return None
if not x:
return None
x = x.strip()
if x.endswith('px'):
return int(x[:-2])
@ -47,26 +66,37 @@ def to_int(x):
return int(x[:-2]) * 12
return int(x)
def clean(text):
text = re.sub('\s*\n\s*', '\n', text)
text = re.sub('[ \t]{2,}', ' ', text)
return text.strip()
def text_length(i):
return len(clean(i.text_content() or ""))
class Unparseable(ValueError):
pass
class Document:
"""Class to build a etree document out of html."""
TEXT_LENGTH_THRESHOLD = 25
RETRY_LENGTH = 250
def __init__(self, input, **options):
"""Generate the document
:param input: string of the html content.
kwargs:
- attributes:
- debug: output debug messages
- min_text_length:
- retry_length:
- url: will allow adjusting links to be absolute
"""
self.input = input
self.options = defaultdict(lambda: None)
for k, v in options.items():
self.options[k] = v
self.options = options
self.html = None
def _html(self, force=False):
@ -77,7 +107,7 @@ class Document:
def _parse(self, input):
doc = build_doc(input)
doc = html_cleaner.clean_html(doc)
base_href = self.options['url']
base_href = self.options.get('url', None)
if base_href:
doc.make_links_absolute(base_href, resolve_base_href=True)
else:
@ -93,12 +123,17 @@ class Document:
def short_title(self):
return shorten_title(self._html(True))
def summary(self):
def summary(self, document_only=False):
"""Generate the summary of the html docuemnt
:param document_only: return only the div of the document, don't wrap
in html and body tags.
"""
try:
ruthless = True
while True:
self._html(True)
for i in self.tags(self.html, 'script', 'style'):
i.drop_tree()
for i in self.tags(self.html, 'body'):
@ -109,47 +144,64 @@ class Document:
candidates = self.score_paragraphs()
best_candidate = self.select_best_candidate(candidates)
if best_candidate:
article = self.get_article(candidates, best_candidate)
article = self.get_article(candidates, best_candidate,
document_only=document_only)
else:
if ruthless:
logging.debug("ruthless removal did not work. ")
log.debug("ruthless removal did not work. ")
ruthless = False
self.debug("ended up stripping too much - going for a safer _parse")
self.debug(
("ended up stripping too much - "
"going for a safer _parse"))
# try again
continue
else:
logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
log.debug(
("Ruthless and lenient parsing did not work. "
"Returning raw html"))
article = self.html.find('body')
if article is None:
article = self.html
cleaned_article = self.sanitize(article, candidates)
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
article_length = len(cleaned_article or '')
retry_length = self.options.get(
'retry_length',
self.RETRY_LENGTH)
of_acceptable_length = article_length >= retry_length
if ruthless and not of_acceptable_length:
ruthless = False
continue # try again
# Loop through and try again.
continue
else:
return cleaned_article
except StandardError, e:
#logging.exception('error getting summary: ' + str(traceback.format_exception(*sys.exc_info())))
logging.exception('error getting summary: ' )
log.exception('error getting summary: ')
raise Unparseable(str(e)), None, sys.exc_info()[2]
def get_article(self, candidates, best_candidate):
# Now that we have the top candidate, look through its siblings for content that might also be related.
def get_article(self, candidates, best_candidate, document_only=False):
# Now that we have the top candidate, look through its siblings for
# content that might also be related.
# Things like preambles, content split by ads that we removed, etc.
sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
sibling_score_threshold = max([
10,
best_candidate['content_score'] * 0.2])
# create a new html document with a html->body->div
if document_only:
output = fragment_fromstring('<div/>')
else:
output = document_fromstring('<div/>')
best_elem = best_candidate['elem']
for sibling in best_elem.getparent().getchildren():
#if isinstance(sibling, NavigableString): continue#in lxml there no concept of simple text
# in lxml there no concept of simple text
# if isinstance(sibling, NavigableString): continue
append = False
if sibling is best_elem:
append = True
sibling_key = sibling # HashableElement(sibling)
if sibling_key in candidates and candidates[sibling_key]['content_score'] >= sibling_score_threshold:
if sibling_key in candidates and \
candidates[sibling_key]['content_score'] >= sibling_score_threshold:
append = True
if sibling.tag == "p":
@ -159,11 +211,18 @@ class Document:
if node_length > 80 and link_density < 0.25:
append = True
elif node_length <= 80 and link_density == 0 and re.search('\.( |$)', node_content):
elif node_length <= 80 \
and link_density == 0 \
and re.search('\.( |$)', node_content):
append = True
if append:
# We don't want to append directly to output, but the div
# in html->body->div
if document_only:
output.append(sibling)
else:
output.getchildren()[0].getchildren()[0].append(sibling)
#if output is not None:
# output.append(best_elem)
return output
@ -172,7 +231,9 @@ class Document:
sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)
for candidate in sorted_candidates[:5]:
elem = candidate['elem']
self.debug("Top 5 : %6.3f %s" % (candidate['content_score'], describe(elem)))
self.debug("Top 5 : %6.3f %s" % (
candidate['content_score'],
describe(elem)))
if len(sorted_candidates) == 0:
return None
@ -180,7 +241,6 @@ class Document:
best_candidate = sorted_candidates[0]
return best_candidate
def get_link_density(self, elem):
link_length = 0
for i in elem.findall(".//a"):
@ -191,10 +251,10 @@ class Document:
return float(link_length) / max(total_length, 1)
def score_paragraphs(self, ):
MIN_LEN = self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD)
MIN_LEN = self.options.get(
'min_text_length',
self.TEXT_LENGTH_THRESHOLD)
candidates = {}
#self.debug(str([describe(node) for node in self.tags(self.html, "div")]))
ordered = []
for elem in self.tags(self._html(), "p", "pre", "td"):
parent_node = elem.getparent()
@ -205,7 +265,8 @@ class Document:
inner_text = clean(elem.text_content() or "")
inner_text_len = len(inner_text)
# If this paragraph is less than 25 characters, don't even count it.
# If this paragraph is less than 25 characters
# don't even count it.
if inner_text_len < MIN_LEN:
continue
@ -214,7 +275,8 @@ class Document:
ordered.append(parent_node)
if grand_parent_node is not None and grand_parent_node not in candidates:
candidates[grand_parent_node] = self.score_node(grand_parent_node)
candidates[grand_parent_node] = self.score_node(
grand_parent_node)
ordered.append(grand_parent_node)
content_score = 1
@ -228,13 +290,18 @@ class Document:
if grand_parent_node is not None:
candidates[grand_parent_node]['content_score'] += content_score / 2.0
# Scale the final candidates score based on link density. Good content should have a
# relatively small link density (5% or less) and be mostly unaffected by this operation.
# Scale the final candidates score based on link density. Good content
# should have a relatively small link density (5% or less) and be
# mostly unaffected by this operation.
for elem in ordered:
candidate = candidates[elem]
ld = self.get_link_density(elem)
score = candidate['content_score']
self.debug("Candid: %6.3f %s link density %.3f -> %6.3f" % (score, describe(elem), ld, score*(1-ld)))
self.debug("Candid: %6.3f %s link density %.3f -> %6.3f" % (
score,
describe(elem),
ld,
score * (1 - ld)))
candidate['content_score'] *= (1 - ld)
return candidates
@ -274,8 +341,8 @@ class Document:
}
def debug(self, *a):
#if self.options['debug']:
logging.debug(*a)
if self.options.get('debug', False):
log.debug(*a)
def remove_unlikely_candidates(self):
for elem in self.html.iter():
@ -289,10 +356,14 @@ class Document:
def transform_misused_divs_into_paragraphs(self):
for elem in self.tags(self.html, 'div'):
# transform <div>s that do not contain other block elements into <p>s
#FIXME: The current implementation ignores all descendants that are not direct children of elem
# This results in incorrect results in case there is an <img> buried within an <a> for example
if not REGEXES['divToPElementsRe'].search(unicode(''.join(map(tostring, list(elem))))):
# transform <div>s that do not contain other block elements into
# <p>s
#FIXME: The current implementation ignores all descendants that
# are not direct children of elem
# This results in incorrect results in case there is an <img>
# buried within an <a> for example
if not REGEXES['divToPElementsRe'].search(
unicode(''.join(map(tostring, list(elem))))):
#self.debug("Altering %s to p" % (describe(elem)))
elem.tag = "p"
#print "Fixed element "+describe(elem)
@ -327,7 +398,8 @@ class Document:
yield e
def sanitize(self, node, candidates):
MIN_LEN = self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD)
MIN_LEN = self.options.get('min_text_length',
self.TEXT_LENGTH_THRESHOLD)
for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33:
header.drop_tree()
@ -357,7 +429,8 @@ class Document:
counts[kind] = len(el.findall('.//%s' % kind))
counts["li"] -= 100
content_length = text_length(el) # Count the text length excluding any surrounding whitespace
# Count the text length excluding any surrounding whitespace
content_length = text_length(el)
link_density = self.get_link_density(el)
parent_node = el.getparent()
if parent_node is not None:
@ -389,10 +462,12 @@ class Document:
reason = "too short content length %s without a single image" % content_length
to_remove = True
elif weight < 25 and link_density > 0.2:
reason = "too many links %.3f for its weight %s" % (link_density, weight)
reason = "too many links %.3f for its weight %s" % (
link_density, weight)
to_remove = True
elif weight >= 25 and link_density > 0.5:
reason = "too many links %.3f for its weight %s" % (link_density, weight)
reason = "too many links %.3f for its weight %s" % (
link_density, weight)
to_remove = True
elif (counts["embed"] == 1 and content_length < 75) or counts["embed"] > 1:
reason = "<embed>s with too short content length, or too many <embed>s"
@ -451,7 +526,7 @@ class Document:
el.drop_tree()
for el in ([node] + [n for n in node.iter()]):
if not (self.options['attributes']):
if not self.options.get('attributes', None):
#el.attrib = {} #FIXME:Checkout the effects of disabling this
pass
@ -484,17 +559,17 @@ class HashableElement():
def __getattr__(self, tag):
return getattr(self.node, tag)
def main():
from optparse import OptionParser
parser = OptionParser(usage="%prog: [options] [file]")
parser.add_option('-v', '--verbose', action='store_true')
parser.add_option('-u', '--url', help="use URL instead of a local file")
parser.add_option('-u', '--url', default=None, help="use URL instead of a local file")
(options, args) = parser.parse_args()
if not (len(args) == 1 or options.url):
parser.print_help()
sys.exit(1)
logging.basicConfig(level=logging.INFO)
file = None
if options.url:
@ -504,7 +579,9 @@ def main():
file = open(args[0], 'rt')
enc = sys.__stdout__.encoding or 'utf-8'
try:
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
print Document(file.read(),
debug=options.verbose,
url=options.url).summary().encode(enc, 'replace')
finally:
file.close()

@ -0,0 +1,762 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
<a href="/baseball/mlb/teams/tigers/">
<title>Detroit Tigers vs. Kansas City Royals - Preview - April 16, 2012</title></a><meta name="description" content="Tigers-Royals preview for game played on April 16, 2012">
<meta name="keywords" content="Detroit Tigers, Kansas City Royals, preview, mlb, baseball, si.com">
<script type="text/javascript">
var SPORTID = "MLB";
var PATH = "/baseball/mlb/scoreboards/2012/04/16/";
var FEEDNAME = "scoreboard.dat";
isViewcast = true;
var searchString = document.location.href;
</script>
<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.e/css/pkg/global_41/129.css"/>
<script type="text/javascript" language="JavaScript" src="http://i.cdn.turner.com/si/.e/js/4.1/global/lib/jquery-1.5.2.min.js"></script>
<script language="JavaScript" type="text/javascript" src="http://i.cdn.turner.com/si/.e/js/pkg/global/593.js"></script>
<script src="http://img.timeinc.net/shared/static/js/tii_ads.js"></script><script>var adConfig=new TiiAdConfig('3475.si2');adConfig.setRevSciTracking(true);</script>
<!--[if IE 9]>
<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.e/css/4.1/ie9.css" />
<![endif]-->
<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.element/css/4.1/gameflash.css"/>
<link rel="stylesheet" type="text/css" href="http://i.cdn.turner.com/si/.element/css/4.1/miniscores.css"/>
<script language="javascript" type="text/javascript">
function hidediv() {
if (document.getElementById) { // DOM3 = IE5, NS6
document.getElementById('cnngCommentsBox').className = 'cnngCommentsBoxOff';
}
else {
if (document.layers) { // Netscape 4
document.cnngCommentsBox.className = 'cnngCommentsBoxOff';
}
else { // IE 4
document.all.cnngCommentsBox.className = 'cnngCommentsBoxOff';
}
}
}
function showdiv() {
if (document.getElementById) { // DOM3 = IE5, NS6
document.getElementById('cnngCommentsBox').className = 'cnngCommentsBox';
}
else {
if (document.layers) { // Netscape 4
document.cnngCommentsBox.className = 'cnngCommentsBox';
}
else { // IE 4
document.all.cnngCommentsBox.className = 'cnngCommentsBox';
}
}
}
function siVideoBegin(cvpInstance, videoId) { }
function siVideoPlay(cvpInstance, videoId) {
var cvpData = cvpInstance.getContentEntry(videoId);
var cvpObject = window.JSON.parse(cvpData);
jQuery('#cnnCVPRecapDetails').show();
jQuery('#cvpHeadline').html(cvpObject.headline);
jQuery('#cvpDescription').html(cvpObject.description);
jQuery('#cvpSource').html(cvpObject.source);
}
function siVideoPlayHead(cvpInstance, playheadTime, totalDuration) { }
function siVideoAdStarted(cvpInstance, videoId) { }
function siVideoTrackingAdCountdown(seconds) { }
function siVideoComplete(cvpInstance, videoId) { }
function siVideoPause(cvpInstance, videoId, paused) { }
function siVideoSeek() { }
</script>
<script language="JavaScript" src="/.element/js/4.1/ads/sasd_ads.js"></script>
<script src="http://i.cdn.turner.com/si/.element/js/4.1/global/lib/iframe_ad_factory.js"></script><script>iframeAdFactory.url = '/si_adspaces/4.0/iframe.html';
window.setInterval(function(){ iframeAdFactory.refresh() }, 45000);</script>
<script type="text/javascript">
var adFactory = new TiiAdFactory(adConfig, "mlb/gameflashpage");
iframeAdFactory.queryString = 'TiiAdConfig=3475.si2&adConfigPairs=' + '&TiiAdFactory=' + encodeURIComponent('mlb/gameflashpage') + '&adFactoryPairs=' + '&paramPairs=' + encodeURIComponent('sport=mlb');
if (TiiAdsIsDebugMode()) { iframeAdFactory.queryString += '&debugads=y'; }
</script>
<link rel="stylesheet" type="text/css" href="http://z.cdn.turner.com/si/.element/css/4.1/gameflash_mlb.css"/>
<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/js/4.1/global/lib/jquery-1.4.2.min.js"></script>
<link rel="stylesheet" type="text/css" href="http://z.cdn.turner.com/si/.element/ssi/scoreboards/4.2/css/scoreticker-master.css"/>
<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/ssi/gameflash/4.2/football/nfl/js/jquery.jsonp-2.1.4.min.js"></script>
<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/ssi/scoreboards/4.2/js/scoreticker-master.js"></script>
<script type="text/javascript" src="http://z.cdn.turner.com/si/.element/ssi/scoreboards/4.2/js/scoreticker-mlb.js"></script>
</head>
<body>
<!--[if IE 6]><div class="ie"><div class="ie6"><![endif]--><!--[if IE 7]><div class="ie"><div class="ie7"><![endif]--><!--[if
IE 8]><div class="ie"><div class="ie8"><![endif]-->
<div class="cnnPage">
<!-- start contentHeader-->
<style>
DIV.cnnSearch { padding:5px 0; }
DIV.cnnSearch DIV.cnnRight { padding:4px 0; }
DIV.cnnSearch DIV.cnnLeft { margin:0;padding:0; }
DIV.cnnSearch DIV.cnnLeft LI { float:left;margin:0;padding:0 5px 0 0; }
DIV.cnnSearch DIV.cnnLeft LI A { display:block;margin:0;padding:0; }
DIV.cnnSearch DIV.cnnLeft LI IMG { vertical-align:bottom; }
DIV.cnnSearch DIV.cnnLeft LI DL { margin:0;padding:0;position:relative;z-index:999999; }
DIV.cnnSearch DIV.cnnLeft LI DT { margin:0;padding:0; }
DIV.cnnSearch DIV.cnnLeft LI DD { left:-999em;margin:0;padding:0 3px 0 1px;position:absolute;top:23px; }
DIV.cnnSearch DIV.cnnLeft LI DL.cnnOver DD,
DIV.cnnSearch DIV.cnnLeft LI DL:hover DD { left:auto; }
DIV.cnnBanner { height:auto; }
DIV.cnnBannerSection DIV.cnnLeft { width:auto; }
DIV.cnnBannerSection DIV.cnnLeft A { display:inline;height:auto;width:auto; }
DIV.cnnBanner { background:transparent url('http://i.cdn.turner.com/si/.element/img/4.1/sect/global/topper.gif') no-repeat top right;position:relative;text-align:left;width:1000px; }
.ie6 DIV.cnnBanner { width:1000px; }
DIV.cnnBanner DIV IMG { display:block; }
DIV.cnnBannerSection { height:99px;position:absolute;left:243px;top:0px;width:757px; }
DIV.cnnBannerSection TD.col0 { display:none; }
DIV.cnnBannerSection DIV.cnn_border { display:none; }
DIV.cnnBannerSection IMG { display:inline;float:left; }
DIV.cnnBannerSection DIV.cnnLeft { float:left; }
DIV.cnnBannerSection DIV.cnnLeft IMG { float:none; }
DIV.cnnBannerSection DIV.cnnRight { float:right;margin:8px 6px 0 0; }
DIV.cnnBannerSection DIV.cnn_header { color:#000;font:bold 50px georgia;line-height:58px;padding:6px 10px 0 0; }
DIV.cnnBannerSection DIV.cnn_header SPAN { font-size:10px;color:#ccc; }
DIV.cnnBannerSection DIV.cnn_header A { color:#000; }
DIV.cnnBannerSection DIV.cnn_header UL { color:#ccc;float:right;font-size:10px;line-height:12px;margin-top:36px; }
.ie DIV.cnnBannerSection DIV.cnn_header UL { margin-top:-21px; }
DIV.cnnBannerSection DIV.cnn_header UL LI { border-left:1px solid #ccc;float:left;padding:0 4px; }
DIV.cnnBannerSection DIV.cnn_header UL LI#cnnItem0 { border:0; }
DIV.cnnBannerSection DIV.cnn_header UL LI#cnnItem2 DIV.cnn_more { font:normal 9px arial; }
DIV.cnnBannerSection DIV.cnn_header UL LI#cnnItem2 DIV.cnn_more A { font:normal 9px arial; }
DIV.cnnBannerSection DIV.cnn_header UL LI DIV.cnn_rollover { background-image:url('http://i.cdn.turner.com/si/.e1d/img/4.0/global/pixels/blank_pixel.gif');display:none;padding:10px 0 9px 0;left:103px;position:absolute;width:654px; }
.ie DIV.cnnBannerSection DIV.cnn_header UL LI DIV.cnn_rollover { top:55px; }
DIV.cnnBannerSection DIV.cnn_header UL LI.cnnOver .cnn_rollover,
DIV.cnnBannerSection DIV.cnn_header UL LI:hover .cnn_rollover { display:block; }
DIV.cnnBannerSection DIV.cnn_more { color:#2e373c;font-size:10px;padding:2px 0 0 0; }
DIV.cnnBannerSection DIV.cnn_more A { color:#fff;font-weight:bold; }
DIV.cnnBannerSection DIV.cnn_more A:hover { color:#e7e7e7; }
DIV.cnnBannerSection DIV.cnn_more DIV { display:none;color:#ccc;line-height:12px; }
DIV.cnnBannerSection DIV.cnn_more DIV SPAN A { font:9px arial;font-weight:normal; }
DIV.cnnBannerSection DIV.cnn_header DIV.cnn_more A { font-family:arial; }
DIV.cnnGameScores { background:#6f7f8b;border-bottom:11px solid #384d5e; }
</style>
<!-- start personalize -->
<div class="cnnPersonalize"><div><div><script>cnn_writePresonalizeBar();</script></div></div></div>
<!-- end personalize -->
<!-- start searchbar -->
<div class="cnnSearch">
<div class="cnnLeft"><ul>
<li class="cnnItem0" id="cnnCM1"><dl><script type="text/javascript">
/* script for 50/50 split */
/*var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1006340.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/WS11_btn_champ_STL.png" alt="Get the Cardinals Championship Package" title="Get the Cardinals Championship Package"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1006340.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/WS11_dropdown_STL.png" alt="Get the Cardinals Championship Package" title="Get the Cardinals Championship Package"/></a></dd>');
} else {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1007180.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/EA-N4S-TheRun-btn.png" alt="Get Need for Speed 12 FREE" title="Get Need for Speed 12 FREE"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1007180.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/EA-N4S-TheRun-SI-dropdown.jpg" alt="Get Need for Speed 12 FREE" title="Get Need for Speed 12 FREE"/></a></dd>');
}
*/
</script>
<!--Kentucky-->
<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009459.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn-champ-kentucky.png" alt="Get the Wildcats Championship Package" title="Get the Wildcats Championship Package"/></a></dt>
<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009459.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-dropdown-kentucky.png" alt="Get the Wildcats Championship Package" title="Get the Wildcats Championship Package"/></a></dd>
<!--original generic sub buttons, changed on 10.26.11 for world series-->
<!--<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1005085.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/si-btn-EA-MADDEN12.png" alt="Get EA Sports Madden NFL 12 Free!" title="Get EA Sports Madden NFL 12 Free!"/></a></dt>
<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1005085.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/si-dropdown-EA-MADDEN12.jpg" alt="Get EA Sports Madden NFL 12 Free!" title="Get EA Sports Madden NFL 12 Free!"/></a></dd>
-->
<script><!--
/*
if (cnnPage.isHomepage) {
var button = $e('cnn_cm_subscribe0');
button.href = 'https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1004340.html';
button = $e('cnn_cm_subscribe1');
button.href = 'https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1004340.html';
}
*/
//--></script>
</dl></a></li>
<li class="cnnItem1"><dl><script type="text/javascript">
var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
/*turning off 50/50 for now*/
/*if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/dropdown_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
} else {*/
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd style="margin-left:-79px"><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/2012_images/cm/bn_2osi16579_290x162_v1.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
//}
</script>
</dl></li>
<li class="cnnItem2"><dl><!--Default ROS
<a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1001406.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe3"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn3_170x30_sigift.png" alt="Give the Gift of SI" title="Give the Gift of SI"/></a>
-->
<script type="text/javascript">
/*var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/Give-the-Gift-of-Sports-Illustrated/site/si-donor0411jacket.html?xid=sirosheader&link=1001406" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2011_images/cm/170x30.png" alt="Give the Gift of SI" title="Give the Gift of SI"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/Give-the-Gift-of-Sports-Illustrated/site/si-donor0411jacket.html?xid=sirosheader&link=1001406" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2011_images/cm/170X110.jpg" alt="Give the Gift of SI" title="Give the Gift of SI"/></a></dd>');
} else {
document.write('<dt><a href="http://www.si.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn_swim.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dt>');
document.write('<dd><a href="http://www.si.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/SWIM_2012_dropdown.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dd>');
*/
</script>
<!--MLB2K 2012-->
<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009469.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn3_MLB2K12.png" alt="Get MLB 2K 12 FREE" title="Get MLB 2K 12 FREE"/></a></dt>
<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1009469.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-MLB2K12-dropdown.jpg" alt="Get MLB 2K 12 FREE" title="Get MLB 2K 12 FREE"/></a></dd>
<!--swimsuit 2012-->
<!--
<dt><a href="http://sportsillustrated.cnn.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe0"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn_swim.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dt>
<dd><a href="http://sportsillustrated.cnn.com/swim2012" target="_blank" rel="nofollow" id="cnn_cm_subscribe1"><img src="http://i.cdn.turner.com/si/2012_images/cm/SWIM_2012_dropdown.png" alt="Swimsuit 2012" title="Swimsuit 2012"/></a></dd>
-->
</dl></li>
</ul>
</div>
<div class="cnnRight"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="cm_search"><input type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
</div>
<!-- end searchbar -->
<!-- start banner -->
<div class="cnnBanner">
<div><a href="/"><img src="http://i.cdn.turner.com/si/.element/img/4.1/sect/global/logo2.png" alt="SI.com Home" title="SI.com Home"/></a>
</div>
<div class="cnnBannerSection">
<div class="cnnLeft"><a href="/baseball/mlb/"><img src="http://i.cdn.turner.com/si/.element/img/4.0/sect/baseball/mlb/icon.jpg"/></a></div>
<div class="cnn_header"><a href="/baseball/mlb/">MLB GAMEFLASH</a></div>
<div class="cnn_more" style="font-size:9px;"><a href="/baseball/mlb/scoreboards/today/">Scores</a> | <a href="/baseball/mlb/teams/">Teams</a> | <a href="/baseball/mlb/players/">Players</a> | <a href="/fantasy/player_news/mlb/">Player News</a> | <a href="/baseball/mlb/standings/">Standings</a> | <a href="/baseball/mlb/probables/today/">Probables</a> | <a href="/baseball/mlb/schedules/weekly/today/">Schedules</a> | <a href="/baseball/mlb/stats/">Stats</a> | <a href="/baseball/mlb/transactions/">Transactions</a> | <a href="/baseball/mlb/injuries/">Injuries</a> | <a href="http://www.ticketcity.com/mlb-tickets.html " target="_blank" rel="nofollow">Tickets</a> | <a href="http://mlb.mlb.com/mlb/subscriptions/index.jsp?product=si&vbID=simlbtv_test" target="_blank" rel="nofollow">MLB.TV</a>
</div>
</div>
</div>
<div class="cnnClear"></div>
<!-- end banner -->
<style>
/*
DIV.cnnTopnav LI A { color:#000;display:block;padding:0 16px 0 16px!important; }
DIV.cnnTopnav LI A { color:#000;display:block;padding:0 23px 0 22px!important; }
*/
DIV.cnnTopnav LI A { color:#000;display:block;padding:0 11px 0 11px!important; }
DIV.cnnTopnav LI.cnnFirst { padding-left:0px; }
</style>
<div class="cnnTopnav">
<ul>
<li class="cnnFirst"><a href="/extramustard/?eref=sinav">EXTRA MUSTARD</a></li>
<li><a href="http://www.fannation.com/?eref=sinav">FANNATION</a></li>
<li><a href="/multimedia/photo_gallery/?eref=sinav">PHOTOS</a></li>
<li><a href="/swimsuit/?eref=sinav">SWIMSUIT</a></li>
<li><a href="/fantasy/?eref=sinav">FANTASY</a></li>
<li><a href="/magazine/sportsman/?eref=sinav">SPORTSMAN</a></li>
<li><a href="http://www.sportsillustratedeverywhere.com/">MAGAZINE</a></li>
<li><a href="/sifk/?eref=sinav">SI KIDS</a></li>
<li><a href="/highschool/?eref=sinav">HIGH SCHOOL</a></li>
<li><a href="/behindthemic/?eref=sinav">BEHIND THE MIC</a></li>
<li><a href="http://www.twackle.com/" target="_blank" rel="nofollow">TWACKLE</a></li>
<!--<li><a href="http://www.maxpreps.com/national/national.htm?eref=sinav" target="_blank" rel="nofollow">MAXPREPS</a></li>-->
</ul>
</div>
<!-- end topnav -->
<style>
.ie6 #cnnBotnav LI#cnnBotnav0 { width:49px; } /* NFL */
.ie6 #cnnBotnav LI#cnnBotnav1 { width:150px; } /* COLLEGE FOOTBALL */
.ie6 #cnnBotnav LI#cnnBotnav2 { width:50px; } /* MLB */
.ie6 #cnnBotnav LI#cnnBotnav3 { width:51px; } /* NBA */
.ie6 #cnnBotnav LI#cnnBotnav4 { width:101px; } /* COLLEGE BB */
.ie6 #cnnBotnav LI#cnnBotnav5 { width:58px; } /* GOLF */
.ie6 #cnnBotnav LI#cnnBotnav6 { width:50px; } /* NHL */
.ie6 #cnnBotnav LI#cnnBotnav7 { width:74px; } /* RACING */
.ie6 #cnnBotnav LI#cnnBotnav8 { width:74px; } /* SOCCER */
.ie6 #cnnBotnav LI#cnnBotnav9 { width:121px; } /* MMA & BOXING */
.ie6 #cnnBotnav LI#cnnBotnav11 { width:73px; } /* TENNIS */
.ie6 #cnnBotnav LI#cnnBotnav12 { width:63px; } /* MORE */
.ie6 #cnnBotnav LI#cnnBotnav13 { width:74px; } /* VIDEO */
#cnnBotnav LI#cnnBotnav0 STRONG { width:49px; } /* NFL */
#cnnBotnav LI#cnnBotnav1 STRONG { width:150px; } /* COLLEGE FOOTBALL */
#cnnBotnav LI#cnnBotnav2 STRONG { width:50px; } /* MLB */
#cnnBotnav LI#cnnBotnav3 STRONG { width:51px; } /* NBA */
#cnnBotnav LI#cnnBotnav4 STRONG { width:101px; } /* COLLEGE BB */
#cnnBotnav LI#cnnBotnav5 STRONG { width:58px; } /* GOLF */
#cnnBotnav LI#cnnBotnav6 STRONG { width:50px; } /* NHL */
#cnnBotnav LI#cnnBotnav7 STRONG { width:74px; } /* RACING */
#cnnBotnav LI#cnnBotnav8 STRONG { width:74px; } /* SOCCER */
#cnnBotnav LI#cnnBotnav9 STRONG { width:121px; } /* MMA & BOXING */
#cnnBotnav LI#cnnBotnav11 STRONG { width:73px; } /* TENNIS */
#cnnBotnav LI#cnnBotnav12 STRONG { width:63px; } /* MORE */
#cnnBotnav LI#cnnBotnav13 STRONG { width:74px; } /* VIDEO */
/* realignment */
#cnnBotnav LI#cnnBotnav11:hover UL,
#cnnBotnav LI#cnnBotnav11 LI.cnnOver UL { margin-left:0; } /* width of subnav minus width of TENNIS minus width of MORE minus 2 lines */
#cnnBotnav LI#cnnBotnav12:hover UL,
#cnnBotnav LI#cnnBotnav12 LI.cnnOver UL { margin-left:-41px; } /* width of subnav minus width of MORE minus 1 line */
#cnnBotnav LI#cnnBotnav13:hover UL,
#cnnBotnav LI#cnnBotnav13 LI.cnnOver UL { margin-left:-93px; width:168px; } /* width of subnav minus width of MORE minus 1 line */
#cnnBotnav LI#cnnBotnav13 UL LI { width:168px; }
</style>
<!-- start botnav -->
<div class="cnnBotnav">
<div>
<ul id="cnnBotnav" style="height:29px;overflow:hidden;">
<li id="cnnBotnav0" nav="nfl">
<a href="/football/nfl/?eref=sinav"><strong>NFL</strong></a>
</li>
<li id="cnnBotnav1" nav="ncaaf">
<a href="/football/ncaa/?eref=sinav"><strong>COLLEGE FOOTBALL</strong></a>
</li>
<li id="cnnBotnav2" nav="mlb">
<a href="/baseball/mlb/?eref=sinav"><strong>MLB</strong></a>
</li>
<li id="cnnBotnav3" nav="nba">
<a href="/basketball/nba/?eref=sinav"><strong>NBA</strong></a>
</li>
<li id="cnnBotnav4" nav="ncaabb">
<a href="/basketball/ncaa/?eref=sinav"><strong>COLLEGE BB</strong></a>
</li>
<li id="cnnBotnav5" nav="golf">
<a href="http://www.golf.com/?eref=sinav"><strong>GOLF</strong></a>
</li>
<li id="cnnBotnav6" nav="nhl">
<a href="/hockey/nhl/?eref=sinav"><strong>NHL</strong></a>
</li>
<li id="cnnBotnav7" nav="racing">
<a href="/racing/?eref=sinav"><strong>RACING</strong></a>
</li>
<li id="cnnBotnav8" nav="soccer">
<a href="/soccer/?eref=sinav"><strong>SOCCER</strong></a>
</li>
<li id="cnnBotnav9" nav="boxmma">
<a href="/mma/?eref=sinav"><strong>MMA &amp; BOXING</strong></a>
</li>
<li id="cnnBotnav11" nav="tennis">
<a href="/tennis/?eref=sinav"><strong>TENNIS</strong></a>
</li>
<li id="cnnBotnav12" nav="more">
<a href="/more/?eref=sinav"><strong>MORE</strong></a>
</li>
<li id="cnnBotnav13" nav="video">
<a href="/video/?eref=sinav"><strong>VIDEO</strong></a>
</li>
</ul>
</div>
</div>
<!-- end botnav -->
<div class="cnnViewerAd"><script type="text/javascript">iframeAdFactory.getAd('i_728x90', 728, 90, new Array('728x90','101x1'), true);</script></div>
<!-- start scoreboard ticker -->
<div id="scoreticker" class="stMLB">
<div id="stScrollWrap">
<a href="" class="stScrollControl left disabled"></a>
<a href="" class="stScrollControl right"></a>
<div id="stScroller"></div>
</div>
</div>
<!-- end scoreboard ticker -->
<!-- end contentHeader-->
<!-- start scoreboard -->
<div class="cnngScoreboardNoLastPlay">
<div class="cnngScoreboard">
<div class="cnnLeft">
<div>&nbsp;
</div>
<table border="0" cellpadding="0" cellspacing="0">
<tr class="cnnRow0">
<td class="cnnCol0">&nbsp;</td>
<td class="cnnCol1">1</td>
<td class="cnnCol2">2</td>
<td class="cnnCol3">3</td>
<td class="cnnCol4">4</td>
<td class="cnnCol5">5</td>
<td class="cnnCol6">6</td>
<td class="cnnCol7">7</td>
<td class="cnnCol8">8</td>
<td class="cnnCol9">9</td>
<td class="cnnColR">R</td>
<td class="cnnColH">H</td>
<td class="cnnColE">E</td>
</tr>
<tr class="cnnRow1">
<td class="cnnCol0"><a href="/baseball/mlb/teams/tigers/">TIGERS</a></td>
<td class="cnnCol1">&nbsp;</td>
<td class="cnnCol2">&nbsp;</td>
<td class="cnnCol3">&nbsp;</td>
<td class="cnnCol4">&nbsp;</td>
<td class="cnnCol5">&nbsp;</td>
<td class="cnnCol6">&nbsp;</td>
<td class="cnnCol7">&nbsp;</td>
<td class="cnnCol8">&nbsp;</td>
<td class="cnnCol9">&nbsp;</td>
<td class="cnnColR">&nbsp;</td>
<td class="cnnColH">&nbsp;</td>
<td class="cnnColE">&nbsp;</td>
</tr>
<tr class="cnnRow2">
<td class="cnnCol0"><a href="/baseball/mlb/teams/royals/">ROYALS</a></td>
<td class="cnnCol1">&nbsp;</td>
<td class="cnnCol2">&nbsp;</td>
<td class="cnnCol3">&nbsp;</td>
<td class="cnnCol4">&nbsp;</td>
<td class="cnnCol5">&nbsp;</td>
<td class="cnnCol6">&nbsp;</td>
<td class="cnnCol7">&nbsp;</td>
<td class="cnnCol8">&nbsp;</td>
<td class="cnnCol9">&nbsp;</td>
<td class="cnnColR">&nbsp;</td>
<td class="cnnColH">&nbsp;</td>
<td class="cnnColE">&nbsp;</td>
</tr>
</table>
</div>
<div class="cnnRight">
<ol>
<li class="cnnItem4">8:10 PM ET
</li>
</ol>
<ul>
<li class="cnnItem0"><strong>Tigers</strong><a href="/baseball/mlb/players/7590/"><img src="http://i.cdn.turner.com/si/.e1d/img/4.0/global/baseball/mlb/players/7590_small.jpg" border="0" width="50" height="76" alt="Verlander" title="Verlander"></a><a href="/baseball/mlb/players/7590/">
<div class="cnnLine0">Verlander</div>
<div class="cnnLine4">0-1</div>
<div class="cnnLine5">2.2&nbsp;ERA</div>
<div class="cnnLine6">&nbsp;</div>
<div class="cnnLine7">&nbsp;</div></a></li>
<li class="cnnItem1"><strong>Royals</strong><a href="/baseball/mlb/players/8932/"><img src="http://i.cdn.turner.com/si/.e1d/img/4.0/global/baseball/mlb/players/8932_small.jpg" border="0" width="50" height="76" alt="Duffy" title="Duffy"></a><a href="/baseball/mlb/players/8932//">
<div class="cnnLine0">Duffy</div>
<div class="cnnLine4">1-0</div>
<div class="cnnLine5">0&nbsp;ERA</div>
<div class="cnnLine6">&nbsp;</div>
<div class="cnnLine7">&nbsp;</div></a></li>
</ul>
</div>
</div>
</div>
<!-- end scoreboard -->
<!-- start navbar -->
<div class="cnngNavbar">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td class="cnnCol0"><span>PREVIEW</span></td>
<td class="cnnCol0"><a href="40630_matchup.html">MATCHUP</a></td></li>
<td class="cnnCol3"><a href="40630_fancomment.html">FAN COMMENTS</a></td>
</tr>
</table>
</div>
<!-- end navbar -->
<!-- start content -->
<div class="cnngContent">
<div class="cnngPreview">
<div class="cnnLeft">
<!-- REAPFINDREPLACE:20120515:/.element/ssi/story/4.1/wires/ap/expired_story.html:/baseball/mlb/gameflash/2012/04/16/40630_preview.html-->
<h1>Tigers-Royals Preview</h1>
<p>
<span class="cnnDataLinked"><a href="/baseball/mlb/players/7590/index.html">Justin Verlander</a></span>
has pitched well in each of his first two starts, though he doesn't have a win to show for those efforts.
</p>
<p>
He hasn't had much trouble earning victories against the
<span class="cnnDataLinked"><a href="/baseball/mlb/teams/royals/index.html">Kansas City Royals</a></span>
.
</p>
<p>
Verlander looks to continue his mastery of the Royals when the
<span class="cnnDataLinked"><a href="/baseball/mlb/teams/tigers/index.html">Detroit Tigers</a></span>
visit Kauffman Stadium in the opener of a three-game series Monday night.
</p>
<p>
The reigning AL
<span class="cnnDataLinked"><a href="/baseball/mlb/players/49534/index.html">Cy Young</a></span>
winner and MVP had a 2-0 lead through eight innings in both of his outings, but the Tigers weren't able to hold the lead.
</p>
<p>Verlander (0-1, 2.20 ERA) allowed two hits before running into trouble in the ninth against Tampa Bay on Wednesday, getting
charged with four runs in 8 1-3 innings of a 4-2 defeat.
</p>"Once a couple guys got on, really the first time I've cranked it up like that - and lost a little bit of my consistency that
I'd had all day," Verlander said. "It's inexcusable. This loss rests solely on my shoulders."
<p>The right-hander did his part in his opening-day start against Boston on April 5, allowing two hits before the bullpen faltered.
Detroit ended up winning 3-2 with a run in the bottom of the ninth, though Verlander didn't earn a decision.
</p>
<p>That hasn't been the case in his last four starts against the Royals, winning each with a 1.82 ERA. Verlander is 13-2 with
a 2.40 ERA in 19 career starts versus Kansas City, and another win will give him more victories than he has against any other
team. He's also beaten Cleveland 13 times.
</p>
<p>Verlander is 8-2 with a 1.82 ERA lifetime at Kauffman Stadium, where the Royals (3-6) were swept in a three-game series against
the Indians with Sunday's 13-7 loss.
</p>
<p>
<span class="cnnDataLinked"><a href="/baseball/mlb/players/7634/index.html">Billy Butler</a></span>
, who is 14 for 39 (.359) with two homers off Verlander, had an RBI single and is hitting .364 with four doubles and a homer
during a five-game hitting streak.
</p>
<p>
Royals pitchers allowed seven home runs, 17 extra-base hits and 32 runs in the series, and manager
<span class="cnnDataLinked"><a href="/baseball/mlb/players/1716/index.html">Ned Yost</a></span>
turned to outfielder
<span class="cnnDataLinked"><a href="/baseball/mlb/players/7899/index.html">Mitch Maier</a></span>
in the ninth to pitched a scoreless inning Sunday.
</p>"Let's hope it doesn't happen again," Maier said. "I don't like to be put in that situation, but we needed an inning."
<p>
Kansas City will look to bounce back with the help of another solid outing from
<span class="cnnDataLinked"><a href="/baseball/mlb/players/8932/index.html">Danny Duffy</a></span>
(1-0, 0.00), who allowed one hit and struck out eight in six innings of a 3-0 win over Oakland on Tuesday.
</p>
<p>The left-hander will be seeking his first win against Detroit after going 0-2 with a 5.63 ERA in three starts versus the Tigers
as a rookie.
</p>
<p>
<span class="cnnDataLinked"><a href="/baseball/mlb/players/7129/index.html">Gerald Laird</a></span>
was a triple short of the cycle and helped the Tigers (6-3) salvage the finale of a three-game series with a 5-2 victory over
Chicago on Sunday.
</p>
<p>
<span class="cnnDataLinked"><a href="/baseball/mlb/players/8419/index.html">Rick Porcello</a></span>
allowed one run in 7 2-3 innings to give Detroit's starting rotation its first victory.
</p>"All the other starters have pitched well," Porcello said. "It's just the way it's happened so far."
<p>Verlander allowed three runs in seven innings of a 4-3 win over the Royals on Aug. 6, beating Duffy, who gave up three runs
over five.
</p>
<!-- /REAPFINDREPLACE:20120515:/.element/ssi/story/4.1/wires/ap/expired_story.html:/baseball/mlb/gameflash/2012/04/16/40630_preview.html-->
<p class="cnnLast">
<a href="http://biz.stats.com/" target="new">&#169; 2011 STATS LLC <img src="http://i.a.cnn.net/si/images/STATSlogo.gif" align="absmiddle" alt="STATS, Inc"></a>
</p>
</div>
<div class="cnnRight">
<div class="cnngCommentsBox" id="cnngCommentsBox">
<div class="cnngComments">
<div class="cnnHolder">
<div id="fanComments">
<iframe src="http://www.fannation.com/gameday/gameflash_game_comments/320416107?sport_id=2" width="397" height="390" marginwidth="0" scrolling="no" frameborder="0"></iframe>
</div>
</div>
</div>
<div class="cnn_footer">
<div class="cnngToggleOn"><a href="javascript:hidediv();">TURN COMMENTS <span>OFF</span></a></div>
<div class="cnngToggleOff"><a href="javascript:showdiv();">TURN COMMENTS <span>ON</span></a></div>
</div>
</div>
</div>
</div>
</div>
<!-- end content -->
<!-- start contentFooter -->
<div class="cnnWideSL"><script type="text/javascript">adsonar_placementId=1488671;adsonar_pid=769769;adsonar_ps=-1;adsonar_zw=978;adsonar_zh=150;</script><script>cnnad_createSL();</script></div>
<!-- start footerbox -->
<div class="cnnFooterBox">
<div class="cnnHolder">
<div class="cnnRight">
<dl>
<dt><a href="/"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/footer_logo.jpg" alt="SI.com" title="SI.com"/></a></dt>
<dd><span>Hot Topics:</span> <a href="/2012/writers/peter_king/04/16/countdown/index.html" title="Peter King: MMQB"class="cnnFirst">Peter King: MMQB</a> <a href="http://nhl-red-light.si.com/2012/04/16/mayhem-reigns-in-stanley-cup-playoffs/" title="NHL Playoffs" target="new" >NHL Playoffs</a> <a href="/2012/writers/george_schroeder/04/16/arkansas-football-petrino/index.html" title="Bobby Petrino">Bobby Petrino</a> <a href="/2012/baseball/mlb/04/16/valentine.youkilis.ap/index.html" title="Bobby Valentine">Bobby Valentine</a> <a href="/2012/writers/michael_mccann/04/16/roger.clemens.trial.preview/index.html" title="Roger Clemens">Roger Clemens</a> <a href="/2012/baseball/mlb/04/16/power.rankings/index.html" title="MLB Power Rankings">MLB Power Rankings</a> <a href="/2012/writers/richard_rothschild/04/13/jackie.robinson/index.html" title="Jackie Robinson">Jackie Robinson</a> </dd>
</dl>
<div class="cnnClear"></div>
<ul>
<li><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002348.html" target="_blank" rel="nofollow">SUBSCRIBE TO SI</a></li>
<li><a href="http://www.sportsillustratedeverywhere.com" target="_blank" rel="nofollow">DIGITAL EDITION</a></li>
<li><a href="/mobile/">SI MOBILE</a></li>
<li><a href="/2010/about_us/jobs/">JOBS</a></li>
<li><a href="/sitemap/">SITE MAP</a></li>
<li><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1003862.html" target="_blank" rel="nofollow">GIVE THE GIFT OF SI</a></li>
<li><a href="http://sipictures.com/" target="_blank" rel="nofollow">SI PICTURE SALES</a></li>
<li><a href="http://www.sportsillustratedsnapshot.com" target="_blank" rel="nofollow">PICTURES OF THE DAY</a></li>
<li><a href="/about_us/">ABOUT US</a></li>
<li><a href="http://simediakit.com" target="_blank" rel="nofollow">SI MEDIA KITS</a></li>
<li><a href="http://www.sicovers.com/default.aspx?utm_source=sicom&utm_medium=ftr&utm_campaign=icrefer&xid=siftr" target="_blank" rel="nofollow">SI COVER COLLECTION</a></li>
<li><a href="http://sicustomerservice.com/" target="_blank" rel="nofollow">SI CUSTOMER SERVICE</a></li>
<li><a href="/2008/magazine/si.books/">SI BOOKS</a></li>
<li><a href="/about_us/feedback/">CONTACT US</a></li>
<li><a href="/services/rss/">ADD RSS HEADLINE</a></li>
</ul>
<div class="cnnClear"></div>
<div class="cnnCopyright">
<style>
.cnnFooterBox .cnnHolder { overflow:hidden; }
.cnnFooterBox .cnnRight DIV.cnnCopyright { line-height:16px;padding-top:2px;text-align:left; }
.cnnFooterBox .cnnRight DIV.cnnCopyright IMG { float:left;margin:0 6px 14px 0; }
.cnnFooterBox .cnnRight DIV.cnnCopyright IMG#cnnFooterAdOpt { float:none;margin:0 0 0 6px;vertical-align:bottom; }
</style>
<img src="http://i.cdn.turner.com//si/.element/img/4.1/global/logo_footer_turner.png" alt="Turner - SI Digital"/>
<script type="text/javascript">if( ( ( document.location.pathname ).indexOf( '/basketball/nba' ) >= 0 ) || ( ( document.location.pathname ).indexOf( '/video/nba' ) == 0 ) ) { document.write( 'TM & &#169; 2012 Turner Broadcasting System, Inc. A Time Warner Company. All Rights Reserved. SI.com is part of CNN Digital Network, which is part of the Turner Digital Network.' ); } else { document.write( 'TM & &#169; 2012 Turner Broadcasting System, Inc. A Time Warner Company. All Rights Reserved. SI.com is part of CNN Digital Network, which is part of the Turner Digital Network.' ); }</script><noscript>TM & &#169; 2012 Turner Broadcasting System, Inc. A Time Warner Company. All Rights Reserved. SI.com is part of CNN Digital Network, which is part of the Turner Digital Network.</noscript>
<br/> <a href="/interactive_legal.html" rel="nofollow">Terms</a> under which this service is provided to you. Read our <a href="/privacy/" rel="nofollow">privacy guidelines</a>, <a href="https://subscription.timeinc.com/storefront/privacy/si/generic_privacy_new.html?dnp-source=E#california" rel="nofollow">your California privacy rights</a>, and <a href="http://subscription-assets.timeinc.com/prod/assets/themes/magazines/default/template-resources/html/legal/ti-corp-behavioral.html">ad choices<img src="http://i.cdn.turner.com/si/.element/img/4.1/global/logo_adchoices.gif" id="cnnFooterAdOpt"/></a>.
</div>
</div>
<div class="cnnLeft"><a href="http://sportsillustrated.cnn.com/vault/cover/featured/11730/index.htm?xid=sivcoverhome"><img style="vertical-align:bottom;" title="SI Cover" alt="SI Cover" src="http://i.cdn.turner.com/si/si_online/covers/images/2012/0416_thumb.jpg"></a><a href="http://www.sportsillustratedeverywhere.com/?xid=sivcoverhome"><img style="vertical-align:bottom;" src="http://i.cdn.turner.com/sivault/.element/img/1.0/read_all_articles_96x12.gif" alt="Read All Articles" border="0" width="96" height="12"></a><a href="http://www.sicovers.com/ils.aspx?p=SPR20120416golf&utm_source=sivault&utm_medium=inet&utm_campain=icrefer &xid=sivcoverhome" target="_blank"><img style="vertical-align:bottom;" src="http://i.cdn.turner.com/sivault/.element/img/1.0/buy_cover_reprint.gif" alt="Buy Cover Reprint" border="0" width="96" height="12"></a>
</div>
</div>
</div>
<!-- end footerbox -->
<!-- start searchbar -->
<div class="cnnSearchFooter">
<div class="cnnCenter"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="footer_search"><input id="searchInputFooter" type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
</div>
<!-- end searchbar -->
<!--START OF PAGELINKS.JS-->
<script language="Javascript">// Post Processing code to update links with tracking references
var url = window.location.href.toString();
url = url.replace(/http:\/\/[^\/]*/, '');
url = url.replace(/\?.*$/, '');
// All links on page
var links = document.getElementsByTagName('a');
for (var i=0; i < links.length; i++) {
var link = links[i];
if (link.href); else continue;
if (link.href.indexOf('.html/')>0) { siLog.debug('Fix trail slash - ',link.href); link.href = link.href.replace(/\.html\//,'.html'); }
if (!cnnPage.isHomepage) {
// Loop through links, add erefs where expected
if (link.href.indexOf('http://www.fannation.com/') == 0) {
cnnAddQ( link, 'eref=fromSI' );
}
if (url != '/' && link.href.indexOf('/vault') > 0) {
cnnAddQ( link, 'eref=sisf' );
}
if (url.indexOf('/danpatrick') != 0 && link.href.indexOf('/danpatrick') > 0 && link.href.indexOf('.mp3') < 0) {
cnnAddQ( link, 'eref=fromSI' );
}
}
if (link.innerHTML == link.getAttribute('title')) {
link.setAttribute('title','');
}
}
function cnnAddQ (link, add) {
if (link.href.toLowerCase().indexOf('javascript') == -1) {
if (link.href.indexOf('?') > 0) link.href = link.href + '&' + add;
else link.href = link.href + '?' + add;
}
}
// Add whitespace to cnnClear
var breaks = $c('cnnClear','div');
/* Homepage */
if (cnnPage.isHomepage) {
cnnTagHPLinks();
/* iPad */
if(navigator.userAgent.indexOf('iPad')>-1) {
$e('cnnShareRow_mobile').href='http://ax.itunes.apple.com/WebObjects/MZStore.'
+'woa/wa/browserRedirect?url=itms%253A%252F%252Fax.itunes.apple.com%252FWebObj'
+'ects%252FMZStore.woa%252Fwa%252FviewSoftware%253Fid%253D329510739%2526mt%253D8';
}
/* Poll frame height issue */
if ($e('cnnPollFrame')) { $e('cnnPollFrame').setAttribute('height','169'); }
}</script>
<!--END OF PAGELINKS.JS-->
</div>
<div><!-- move tracking out of cnnpage -->
<!-- ADBP/JSMD -->
<!-- ADBP Meta Data -->
<script type="text/javascript" src="http://i.cdn.turner.com/si/.e/js/4.1/global/jsmd/metadata.js"></script>
<!-- /ADBP Meta Data -->
<!-- JSMD Code -->
<script language="JavaScript" type="text/javascript" src="http://i.cdn.turner.com/si/.element/js/4.1/global/jsmd/jsmd.js"></script>
<script language="JavaScript">
<!-- $pathname is defined in metadata.js
if($pathname.indexOf("/.element/ssi/ads.iframes/") == -1 && $pathname.indexOf("/doubleclick/dartiframe.html") == -1) {
var jsmd=_jsmd.init();
if(document.referrer !== window.location.href){
jsmd.send();
}
}
//-->
</script>
<!-- / End JSMD Code -->
<!-- /ADBP/JSMD -->
</div>
<div style="font-size:1px;line-height:1px;">
<div><img src="/cookie.crumb" width="1" height="1"></div>
</div>
<img src="http://i.cdn.turner.com/si/.e/img/4.0/global/pixels/blank_pixel.gif" alt="" id="TargetImageDE" name="TargetImageDE" onload="cnnad_getDEAdHeadCookie(this)" height="1" width="1">
<script language="JavaScript">
siTracking.init();
</script>
<script language="JavaScript">
//ADM
cnnad_sendADMData();
cnnad_ugsync();
</script>
<!-- TIIAD -->
<script type="text/javascript">
function siQuantcast()
{
var lb = "Time Inc News Business and Sports,Sports Illustrated";
var lb_ch = (jsmd.get("m:page.section[0]") ? jsmd.get("m:page.section[0]") : "");
lb+=(lb_ch != null && typeof(lb_ch) == "string" && lb_ch.length > 0) ? "." + lb_ch:"";
return lb;
}
_qoptions={
qacct:"p-5dyPa639IrgIw",
labels:siQuantcast()
};
</script>
<script type="text/javascript" src="http://edge.quantserve.com/quant.js"></script>
<noscript><img src="http://pixel.quantserve.com/pixel/p-5dyPa639IrgIw.gif?labels=Time Inc News Business and Sports,Sports Illustrated" style="display: none;" border="0" height="1" width="1" alt="Quantcast"/></noscript>
<script src="http://js.revsci.net/gateway/gw.js?csid=H07710&auto=t" type="text/javascript"></script>
<!-- /TIIAD -->
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/pagelinks.js" type="text/javascript"></script>
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/subnav.js" type="text/javascript"></script>
<!-- end contentFooter -->
<!--[if IE 6]></div></div><![endif]--><!--[if IE 7]></div></div><![endif]--><!--[if IE 8]></div></div><![endif]-->
</body>
</html>

@ -0,0 +1,39 @@
import os
import unittest
from readability import Document
SAMPLES = os.path.join(os.path.dirname(__file__), 'samples')
def load_sample(filename):
"""Helper to get the content out of the sample files"""
return open(os.path.join(SAMPLES, filename)).read()
class TestArticleOnly(unittest.TestCase):
"""The option to not get back a full html doc should work
Given a full html document, the call can request just divs of processed
content. In this way the developer can then wrap the article however they
want in their own view or application.
"""
def test_si_sample(self):
"""Using the si sample, load article with only opening body element"""
sample = load_sample('si-game.sample.html')
doc = Document(
sample,
url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html')
res = doc.summary()
self.assertEqual('<html><body><div><div class', res[0:27])
def test_si_sample_doc_only(self):
"""Using the si sample, make sure we can get the article alone."""
sample = load_sample('si-game.sample.html')
doc = Document(sample, url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html')
res = doc.summary(document_only=True)
self.assertEqual('<div><div class="', res[0:17])
Loading…
Cancel
Save