You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
python-readability/src/tests/regression_test_data/mit-000-orig.html

246 lines
23 KiB
HTML

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US" >
<head>
<base href="http://web.mit.edu/newsoffice/2011/compare-recommendation-systems-0708.html" />
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="robots" content="index,follow,noodp,noydir" />
<meta name="keywords" content="MIT,Massachusetts Institute of Technology,News Office,Computer science and technology,Electrical engineering and electronics,LIDS,Ranking algorithms,Rankings,Recommendation engines" />
<meta name="author" content="Larry Hardesty, MIT News Office" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="Researchers believe that comparing products, rather than rating them on an absolute scale, will lead to algorithms that better predict customers preferences." />
<title>Improving recommendation systems - MIT News Office</title>
<link rel="stylesheet" type="text/css" href="/newsoffice/plugins/system/compress/css.php?css=c02feab0fb9955599676411caef15f7d.css"/>
<script type="text/javascript" src="/newsoffice/plugins/system/compress/js.php?js=9e9ae0680c37e64c1e6523aab2eac8d3.js"></script>
<link href="http://web.mit.edu/newsoffice/2011/compare-recommendation-systems-0708.html" rel="canonical" />
<link href="/newsoffice/templates/mit/favicon.ico" rel="shortcut icon" type="image/x-icon" />
<script type="text/javascript">
var JOSC_postCSS=1;
</script>
<script type='text/javascript'>
var _JOOMLACOMMENT_MSG_DELETE = "Are you sure you want to delete this comment?"; var _JOOMLACOMMENT_MSG_DELETEALL = "Are you sure you want to delete all comments?"; var _JOOMLACOMMENT_WRITECOMMENT = "Write comment"; var _JOOMLACOMMENT_SENDFORM = "Send"; var _JOOMLACOMMENT_EDITCOMMENT = "Edit comment"; var _JOOMLACOMMENT_EDIT = "Edit"; var _JOOMLACOMMENT_FORMVALIDATE = "Please insert at least a comment."; var _JOOMLACOMMENT_FORMVALIDATE_CAPTCHA = "Please input the anti-spam code that you can read in the image."; var _JOOMLACOMMENT_FORMVALIDATE_CAPTCHA_FAILED = "Anti-spam code is not correct. Please input the code that you can read in the image."; var _JOOMLACOMMENT_FORMVALIDATE_EMAIL = "To be notified, please enter your email"; var _JOOMLACOMMENT_ANONYMOUS = "Anonymous"; var _JOOMLACOMMENT_BEFORE_APPROVAL = "Your comment has been queued for moderation by site administrators and will be published after approval."; var _JOOMLACOMMENT_REQUEST_ERROR = "Request failed"; var _JOOMLACOMMENT_MSG_NEEDREFRESH = "";
</script>
<meta property="og:type" content="article" />
<meta property="og:site_name" content="MIT's News Office" />
<meta property="og:url" content="http://web.mit.edu/newsoffice/2011/compare-recommendation-systems-0708.html" />
<meta property="og:title" content="Improving recommendation systems - MIT News Office" />
<meta property="og:author" content="Larry Hardesty, MIT News Office" />
<meta property="og:description" content="Researchers believe that comparing products, rather than rating them on an absolute scale, will lead to algorithms that better predict customers preferences." />
<meta property="og:image" content="http://web.mit.edu/newsoffice//images/article_images/tn/20110707114444-1.jpg" />
<link rel="image_src" href="http://web.mit.edu/newsoffice//images/article_images/tn/20110707114444-1.jpg" />
<meta property="fb:page_id" content="126533127390327" />
<!--[if IE]><link rel="stylesheet" href="/newsoffice/templates/mit/css/ie.css" type="text/css" /><![endif]-->
<!--[if IE 6]><link rel="stylesheet" href="/newsoffice/templates/mit/css/ie6.css" type="text/css" /><![endif]-->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-1592615-17']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</head>
<body>
<div id="wrapper">
<div id="topbar"><a href="http://web.mit.edu">massachusetts institute of technology</a></div>
<div id="header">
<div class="logo">
<a href="/newsoffice" title="MIT News">
<img src="/newsoffice/templates/mit/images/newslogo.gif" alt="MIT News" width="278" height="48" />
</a>
</div>
<div class="search"><form action="index.php" method="post">
<div class="searchSubmit">
search<br/>
<input name="searchword" id="mod_search_searchword" maxlength="60" class="inputboxSubmit" type="text" size="19" value="" onblur="if(this.value=='') this.value='search...';" onfocus="if(this.value=='search...') this.value='';" /><input type="image" value="Search" class="buttonSubmit no_border" src="/newsoffice/images/M_images/searchButton.gif" onclick="this.form.searchword.focus();"/> </div>
<input type="hidden" name="task" value="search" />
<input type="hidden" name="option" value="com_search" />
</form></div>
<div class="clear"></div>
<div id="menu"><ul class="link"><li class="link-li"><a class="link-a ov red" href="/newsoffice/engineering.html">engineering</a></li><li class="link-li"><a class="link-a ov red" href="/newsoffice/science.html">science</a></li><li class="link-li"><a class="link-a ov red" href="/newsoffice/management.html">management</a></li><li class="link-li"><a class="link-a ov red" href="/newsoffice/architecture-and-planning.html">architecture + planning</a></li><li class="link-li"><a class="link-a ov red" href="/newsoffice/humanities-arts-and-social-sciences.html">humanities, arts, and social sciences</a></li><li class="link-li"><a class="link-a ov " href="/newsoffice/campus.html">campus</a></li><li class="link-li"><a class="link-a ov " href="/newsoffice/multimedia.html">multimedia</a></li><li class="link-li"><a class="link-a ov " href="/press/">press</a></li></ul></div>
</div>
<div id="sheadcol">
<h1>Improving recommendation systems</h1>
<div class="dek">Researchers believe that comparing products, rather than rating them on an absolute scale, will lead to algorithms that better predict customers preferences.</div>
<div class="byline">Larry Hardesty, MIT News Office<br /></div></div>
<div class="clear"></div>
<div id="scol1">
<div class="section">
<h2>today's news</h2><div class="story"><h3><a href="/newsoffice/2011/language-from-games-0712.html">Computer learns language by playing games</a></h3><div class="storypic"><a href="/newsoffice/2011/language-from-games-0712.html"><img src="http://img.mit.edu/newsoffice/images/article_images/tn/20110711172908-1.jpg" alt="" height="116" width="180" /></a><br /><div class="caption">"Civilization" is a strategy game in which players build empires by, among
other things, deciding where to found cities and deploy armies.
<br /><span class="credit">Image courtesy of Sid Meier's Civilization V</span></div></div><p>By basing its strategies on the text of a manual, a computer infers the meanings of words without human supervision. </p></div><h4><a href="/newsoffice/2011/humanizing-livers-0712.html">Recreating human livers, in mice</a></h4><div class="smdate">July 12, 2011</div><h4><a href="/newsoffice/2011/printable-solar-cells-0711.html">While youre up, print me a solar cell</a></h4><div class="smdate">July 11, 2011</div></div>
<div id="primaryTagStories">
<h2 class="lowercase">similar stories</h2><h4><a href="/newsoffice/2010/drug-development-0413.html">Drug discovery, Netflix style?</a></h4><div class="smdate">April 13, 2010</div></div>
</div>
<div id="scol2">
<div id="main_container"><div class="storyfull">
<div class="lead">
<div class="storypic">
<img src="http://web.mit.edu/newsoffice//images/article_images/20110707114444-1.jpg" width="368" height="365" alt="" />
<div class="caption">
<span class="credit">Graphic: Christine Daniloff</span>
</div>
</div>
<div class="leadwithimage">
<div class="byline">
<div class="date">July 8, 2011</div>
</div>
<br />
<div class="storybuttons">
<div class="storybutton emailbutton">
<a href="/newsoffice/component/mailto/?articleId=16586&amp;tmpl=component&amp;link=aHR0cDovL3dlYi5taXQuZWR1L25ld3NvZmZpY2UvMjAxMS9jb21wYXJlLXJlY29tbWVuZGF0aW9uLXN5c3RlbXMtMDcwOC5odG1s" onclick="window.open(this.href,'win2','status=yes,toolbar=yes,scrollbars=yes,titlebar=yes,menubar=yes,resizable=yes,width=600,height=500,directories=yes,location=yes'); return false;">email</a>
</div>
<div class="storybutton commentbutton">
<a href="#JOSC_TOP">comment</a>
</div>
<div class="storybutton printbutton">
<a href="/newsoffice/2011/compare-recommendation-systems-0708.html?tmpl=component&amp;print=1" title="Print" onclick="window.open(this.href,'win2','status=yes,toolbar=yes,scrollbars=yes,titlebar=yes,menubar=yes,resizable=yes,width=640,height=480,directories=yes,location=yes'); return false;" rel="nofollow">print</a>
</div>
<div class="storybutton sharebutton">
<script type="text/javascript">
var addthis_config = {
pubid: "xa-4a73301e0c1fbbc2",
ui_508_compliant:true,
ui_cobrand: "MIT's News Office",
ui_delay: 250,
data_ga_property: "UA-1592615-17",
data_track_clickback: true,
services_exclude: "print, printfriendly",
};
var addthis_share = {
url: "http://web.mit.edu/newsoffice/2011/compare-recommendation-systems-0708.html",
url_transforms: { clean: true, shorten: { twitter: "bitly" } },
shorteners : { bitly : { username: "mitnews", apiKey: "R_b159e47f15dfa915bd41e493ce415138" } }
};
</script>
<a href="http://www.addthis.com/bookmark.php?v=250"
class="addthis_button">share</a>
<script type="text/javascript" src="http://s7.addthis.com/js/250/addthis_widget.js"></script>
</div>
</div>
</div>
</div>
</div>
<div style="clear: left;"></div>
<div class="articlebody ">
Recommendation algorithms are a vital part of todays Web, the basis of the targeted advertisements that account for most commercial sites revenues and of services such as Pandora, the Internet radio site that tailors song selections to listeners declared preferences. The DVD rental site Netflix deemed its recommendation algorithms important enough that it offered a million-dollar prize to anyone who could improve their predictions by 10 percent.<br /><br />But Devavrat Shah, the Jamieson Career Development Associate Professor of Electrical Engineering and Computer Science in MITs Laboratory of Information and Decisions Systems, thinks that the most common approach to recommendation systems is fundamentally flawed. Shah believes that, instead of asking users to rate products on, say, a five-star scale, as Netflix and Amazon do, recommendation systems should ask users to compare products in pairs. Stitching the pairwise rankings into a master list, Shah argues, will offer a more accurate representation of consumers preferences.<br /><br />In a series of papers (<a href="http://web.mit.edu/devavrat/www/nips2008.pdf" target="_blank">paper 1</a> | <a href="http://arxiv.org/abs/0910.0895" target="_blank">paper 2</a> | <a href="http://arxiv.org/abs/0910.0063" target="_blank">paper 3</a>) published over the last few years, Shah, his students Ammar Ammar and Srikanth Jagabathula, and Vivek Farias, an associate professor at the MIT Sloan School of Management, have demonstrated algorithms that put that theory into practice. Besides showing how the algorithms can tailor product recommendations to customers, theyve also built a <a href="http://celect.lids.mit.edu/" target="_blank">website</a> that uses the algorithms to help large groups make collective decisions. And at an Institute for Operations Research and Management Sciences conference in June, they presented a version of their algorithm that had been tested on detailed data about car sales collected over the span of a year by auto dealers around the country. Their algorithm predicted car buyers preferences with 20 percent greater accuracy than existing algorithms.<br /><br /><strong>Calibration conundrum</strong><br /><br />One of the problems with basing recommendations on ratings, Shah explains, is that an individuals rating scale will tend to fluctuate. “If my mood is bad today, I might give four stars, but tomorrow Id give five stars,” he says. “But if you ask me to compare two movies, most likely I will remain true to that for a while.” <br /><br />Similarly, ratings scales may vary between people. “Your three stars might be my five stars, or vice versa,” Shah says. “For that reason, I strongly believe that comparison is the right way to capture this.”<br /><br />Moreover, Shah explains, anyone who walks into a store and selects one product from among the three displayed on a shelf is making an implicit comparison. So in many contexts, comparison data is actually easier to come by than ratings.<br /><br />Shah believes that the advantages of using comparison as the basis for recommendation systems are obvious but that the computational complexity of the approach has prevented its wide adoption. The results of thousands — or millions — of pairwise comparisons could, of course, be contradictory: Some people may like "Citizen Kane" better than "The Godfather," but others may like "The Godfather" better than "Citizen Kane." The only sensible way to interpret conflicting comparisons is statistically. But there are more than three million ways to order a ranking of only 10 movies, and every one of them may have some probability, no matter how slight, of representing the ideal ordering of at least one ranker. Increase the number of movies to 20, and there are more ways to order the list than there are atoms in the universe.<br /><br /><strong>Ordering out</strong><br /><br />So Shah and his colleagues make some assumptions that drastically reduce the number of possible orderings they have to consider. The first is simply to throw out the outliers. For example, Netflixs movie-rental data assigns the Robin Williams vehicle "Patch Adams" the worst reviews, on average, of any film with a statistically significant number of ratings. So the MIT algorithm would simply disregard all the possible orderings in which "Patch Adams" ranked highly.<br /><br />Even with the outliers eliminated, however, a large number of plausible orderings might remain. From that group, the MIT algorithm selects a subset: the smallest group of orderings that fit the available data. This approach can winnow an astronomically large number of orderings down to one thats within the computational purview of a modern computer.<br /><br />Finally, when the algorithm has arrived at a reduced number of orderings, it uses a movies rank in each of the orderings, combined with the probability of that ordering, to assign the movie an overall score. Those scores determine the final ordering.<br /><br />Paat Rusmevichientong, an associate professor of information and operations management at the University of Southern California, thinks that the most interesting aspect of Shahs work is the alternative it provides to so-called parametric models, which are more restrictive. These, he says, were “the state of the art up until 2008, when Professor Shahs paper first came out.” <br /><br />“Theyve really, substantially enlarged the class of choice models that you can work with,” Rusmevichientong says. “Before, people never thought that it was possible to have rich, complex choice models like this.”<br /><br />The next step, Rusmevichientong says, is to test that type of model selection against real-world data. The analysis of car sales is an early example of that kind of testing, and the MIT researchers are currently working up a version of their conference paper for journal publication. “Ive been waiting to see the paper,” Rusmevichientong says. “That sounds really exciting.”<br /><br /><!-- START of joscomment --><div id='comment'>
<a name='JOSC_TOP'></a>
<table align='center' class='sectiontableheader' id='CommentMenu' width='100%' cellpadding='0' cellspacing='0' border='0'>
<tr>
<td align='left' class='label'>Comments</td>
<td align='right' class='buttons'>
<table cellpadding='0' cellspacing='0' border='0'>
<tr>
<td class='button'>
</td>
</tr>
</table>
</td>
</tr>
</table>
<div id='JOSC_formpos'>
<div class="onlyregistered"><a href="/newsoffice/login.html?articleId=16586&amp;articleItemid=89">Log in to write comments</a></div><form name='joomlacommentform' method='post' action='PHP_SELF'><input type='hidden' name='content_id' value='16586' /><input type='hidden' name='component' value='' /><input type='hidden' name='joscsectionid' value='1' /><table class='buttoncontainer' style='display:none;' cellpadding='0' cellspacing='0'><tr><td><input type='button' class='button' name='bsend' value='{_SENDFORM}' onclick='JOSC_editPost(-1,-1)' /></td><td id='JOSC_busy'></td></tr></table></form>
</div>
<table align='center' id='CommentPageNav' width='100%' cellpadding='0' cellspacing='0' border='0'>
<tr><td>
<div id="joscPageNav"></div>
</td><td id='JOSC_busypage'></td></tr>
</table>
<div id='Comments'></div>
</div>
<script type='text/javascript'>
var JOSC_ajaxEnabled=1;if (!JOSC_http) JOSC_ajaxEnabled=false;var JOSC_sortDownward='0';var JOSC_captchaEnabled=true;var JOSC_template='http://web.mit.edu/newsoffice/components/com_comment/joscomment/templates/SSlide-emotop';var JOSC_liveSite='http://web.mit.edu/newsoffice/components/com_comment/joscomment';var JOSC_ConfigLiveSite='http://web.mit.edu/newsoffice/';var JOSC_linkToContent='http://web.mit.edu/newsoffice/2011/compare-recommendation-systems-0708.html';var JOSC_autopublish='0';
</script>
<!-- END of joscomment --></div>
</div>
</div>
<div id="scol3"><div class="section_separator"></div><div id="top_right_line"></div><div class="section">
<h2>related</h2>
<div><h4><a href="http://web.mit.edu/devavrat/www/" target="_blank">Devavrat Shah</a></h4></div>
<div><h4><a href="http://web.mit.edu/orc/www/index.html" target="_blank">Operations Research Center</a></h4></div>
<div><h4><a href="http://lids.mit.edu" target="_blank">Laboratory of Information and Decisions Systems</a></h4></div>
</div>
<h2>tags</h2>
<div class="labels-label-list">
<h4 class="tagsH4"><a class="label" href="/newsoffice/topic/computers.html" title="Computer science and technology">Computer science and technology</a></h4><h4 class="tagsH4"><a class="label" href="/newsoffice/topic/electrical-engineering.html" title="Electrical engineering and electronics">Electrical engineering and electronics</a></h4><h4 class="tagsH4"><a class="label" href="/newsoffice/topic/lids.html" title="LIDS">LIDS</a></h4><h4 class="tagsH4"><a class="label" href="/newsoffice/topic/ranking-algorithms.html" title="Ranking algorithms">Ranking algorithms</a></h4><h4 class="tagsH4"><a class="label" href="/newsoffice/topic/rankings.html" title="Rankings">Rankings</a></h4><h4 class="tagsH4"><a class="label" href="/newsoffice/topic/recommendation-engines.html" title="Recommendation engines">Recommendation engines</a></h4>
</div></div>
<div class="clear"></div>
<div id="footer">
<a href="http://web.mit.edu" title="Massachusetts Institute of Technology"><img src="/newsoffice/templates/mit/images/footer-logo.gif" alt="Massachusetts Institute of Technology" align="left" width="80" height="44" /></a>
<a href="http://giving.mit.edu/" onclick="_gaq.push(['_trackEvent', 'Link', 'MIT', 'http://giving.mit.edu/']);" target="blank"><img src="http://web.mit.edu/newsoffice/templates/mit/images/GiveButton.png" alt="Give to MIT" align="right" width="109" height="25" /></a>
<span itemscope itemtype="http://schema.org/LocalBusiness"><span itemprop="name">MIT news</span>&nbsp;&nbsp;|&nbsp;&nbsp;<span itemprop="address" itemscope itemtype="http://schema.org/PostalAddress"><span itemprop="streetAddress">77 Massachusetts Avenue</span>, Room 11-400</span>&nbsp;&nbsp;|&nbsp;&nbsp;<span itemprop="addressLocality">Cambridge</span>, <span itemprop="addressRegion">MA</span> <span itemprop="postalCode">02139-4307</span>&nbsp;&nbsp;|&nbsp;&nbsp;<span itemprop="telephone">617.253.2700</span>&nbsp;&nbsp;|&nbsp;&nbsp;TTY 617.258.9344</span>
<br />
<a href="http://twitter.com/MITNews" class="footer_link" target="_blank" title="Follow Us on Twitter" onclick="_gaq.push(['_trackEvent', 'Link', 'MIT', 'http://twitter.com/mitnews']);">twitter</a>&nbsp;|&nbsp;<a href="/newsoffice/rss.html" class="footer_link" title="rss">rss</a>&nbsp;|&nbsp;<a href="/newsoffice/contact.html" class="footer_link" title="contact">contact</a>&nbsp;|&nbsp;<a href="/newsoffice/about.html" class="footer_link" title="about mit news office">about the mit news office</a>&nbsp;|&nbsp;<a href="/newsoffice/terms-of-use.html" class="footer_link" title="terms of use">terms of use</a>&nbsp;|&nbsp;<a href="/newsoffice/news-office-comments.html" title="comment on this site" class="footer_link">comments</a>&nbsp;|&nbsp;<span><a href="http://web.mit.edu" class="footer_link boldText" title="Massachusetts Institute of Technology">Massachusetts Institute of Technology</a></span>
</div>
</div>
<!-- SiteCatalyst code version: H.23.3.
Copyright 1996-2011 Adobe, Inc. All Rights Reserved
More info available at http://www.omniture.com -->
<script language="JavaScript" type="text/javascript" src="/newsoffice/templates/mit/js/s_code.js"></script>
<script language="JavaScript" type="text/javascript"><!--
s.pageName=""
s.channel="engineering"
s.pageType=""
/************* DO NOT ALTER ANYTHING BELOW THIS LINE ! **************/
var s_code=s.t();if(s_code)document.write(s_code)//--></script>
<script language="JavaScript" type="text/javascript"><!--
if(navigator.appVersion.indexOf('MSIE')>=0)document.write(unescape('%3C')+'\!-'+'-')
//--></script><noscript><img src="http://mitnewsoffice.122.2o7.net/b/ss/mitnews/1/H.23.3--NS/0"
height="1" width="1" border="0" alt="" /></noscript><!--/DO NOT REMOVE/-->
<!-- End SiteCatalyst code version: H.23.3. -->
<script type="text/javascript">
setTimeout(function(){var a=document.createElement("script");
var b=document.getElementsByTagName('script')[0];
a.src=document.location.protocol+"//dnn506yrbagrg.cloudfront.net/pages/scripts/0011/6778.js";
a.async=true;a.type="text/javascript";b.parentNode.insertBefore(a,b)}, 1);
</script>
</body>
</html>