View Single Post
Old 03-13-2010, 01:06 PM   #1588
Ekips
Member
Ekips began at the beginning.
 
Posts: 15
Karma: 10
Join Date: Mar 2010
Device: PW2, K3gb(x2), K3w, K4, k5(x3) PRS-505s, Stanza for ipod


I'm still trying to stumble my way through a custom script to fetch the news from the sun website, I've sorted it so it changes the web page into the print page and leaves out the slideshows but it fetches all sorts of rubbish after the story including all the 'connect to us' stuff

How would I just get the headline and the main body of the article?

Here's the source code from a basic printpage from the sun.co.uk

Code:
<script language="JavaScript" type="text/javascript"> 
<!--
var s_account="newsintthesunprod,newsintsunnetworkprod,newsintniglobalprod";
//-->
</script>
<script type="text/javascript" src="/js/s_codeFULLSOL.js"></script>
<script type="text/javascript">
var _hbEC=0,_hbE=new Array;function _hbEvent(a,b){b=_hbE[_hbEC++]=new Object();b._N=a;b._C=0;return b;}
var hbx=_hbEvent("pv");hbx.vpc="HBX0100u";hbx.gn="ngd.thesun.co.uk";
// set vars to be used below
var urlReturnCid = "";
var urlReturnAttr = "";
// First, we load the URL into a variable
var url = window.location.href;
// Next, split the url by the #
var qparts = url.split("#");
// Check that there is a querystring
if (qparts.length == 2) {
// Set the second half of string to var
var query = qparts[1];
// Next, split that string by the &
var varQ = query.split("&");
if (varQ.length == 2) {
// Lastly split by = and assign to vars
var retQ1 = varQ[0].split("=");
var retQ2 = varQ[1].split("=");
urlReturnCid = retQ1[1];
urlReturnAttr = retQ2[1];
}
}
//BEGIN EDITABLE SECTION
//CONFIGURATION VARIABLES
hbx.pn="Yobs+on+film+tauntingbrtragic+neighbour+PRTF-2891313";
s.events="";
s.pageName="SOL_PRTF_2891313 /News";
s.channel="/Home/News";
s.prop1="Home";
s.prop2="/Home/News";
s.prop3="/Home/News";
s.prop4="SOL";
s.prop5="PRTF";
s.prop6="Yobs on film tauntingbrtragic neighbour_PRTF";
s.prop15="";
s.prop16="2891313";
s.prop19="";
s.prop20="";	
s.prop25="";
s.campaign="";
s.hier2="/Home/News";
s.eVar15="";
hbx.mlc = "/Home/News"; 
hbx.acct = "DM5403272HDE;DM5406146PDA";
hbx.pndef="title";//DEFAULT PAGE NAME
hbx.ctdef="full";//DEFAULT CONTENT CATEGORY
//OPTIONAL PAGE VARIABLES
//ACTION SETTINGS
hbx.fv="";//FORM VALIDATION MINIMUM ELEMENTS OR SUBMIT FUNCTION NAME
hbx.lt="manual";//LINK TRACKING
hbx.dlf="n";//DOWNLOAD FILTER
hbx.dft="n";//DOWNLOAD FILE NAMING
hbx.elf="n";//EXIT LINK FILTER
//SEGMENTS AND FUNNELS
hbx.seg="";//VISITOR SEGMENTATION
hbx.fnl="";//FUNNELS
//CAMPAIGNS
hbx.cmp=urlReturnCid;//CAMPAIGN ID
hbx.cmpn="";//CAMPAIGN ID IN QUERY
hbx.dcmp="";//DYNAMIC CAMPAIGN ID
hbx.dcmpn="";//DYNAMIC CAMPAIGN ID IN QUERY
hbx.dcmpe="";//DYNAMIC CAMPAIGN EXPIRATION
hbx.dcmpre="";//DYNAMIC CAMPAIGN RESPONSE EXPIRATION
hbx.hra=urlReturnAttr;//RESPONSE ATTRIBUTE
hbx.hqsr="";//RESPONSE ATTRIBUTE IN REFERRAL QUERY
hbx.hqsp="";//RESPONSE ATTRIBUTE IN QUERY
hbx.hlt="";//LEAD TRACKING
hbx.hla="";//LEAD ATTRIBUTE
hbx.gp="";//CAMPAIGN GOAL
hbx.gpn="";//CAMPAIGN GOAL IN QUERY
hbx.hcn="";//CONVERSION ATTRIBUTE
hbx.hcv="";//CONVERSION VALUE
hbx.cp="null";//LEGACY CAMPAIGN
hbx.cpd="";//CAMPAIGN DOMAIN
//CUSTOM VARIABLES
hbx.ci="";//CUSTOMER ID
hbx.hc1="";//CUSTOM 1
hbx.hc2="";//CUSTOM 2
hbx.hc3="";//CUSTOM 3
hbx.hc4="";//CUSTOM 4
hbx.hrf="";//CUSTOM REFERRER
hbx.pec="";//ERROR CODES
//INSERT CUSTOM EVENTS
//END EDITABLE SECTION
</script>
<script language="JavaScript" type="text/javascript"><!--
/************* DO NOT ALTER ANYTHING BELOW THIS LINE ! **************/
var s_code=s.t();if(s_code)document.write(s_code)//--></script>
<script language="JavaScript" type="text/javascript"><!--
if(navigator.appVersion.indexOf('MSIE')>=0)document.write(unescape('%3C')+'\!-'+'-')
//--></script><!--/DO NOT REMOVE/-->
<!-- End SiteCatalyst code version: H.20.3. -->
</script><script src="/js/hbx.js" type="text/javascript"></script>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>

<meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7"/>
<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1" />
<title>Print Friendly Page</title>
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon" />
<script src="/js/jquery.js" type="text/javascript"></script>
<script src="/js/cufon/cufon-yui.js" type="text/javascript"></script>
<script src="/js/cufon/cufon-font-thc.js" type="text/javascript"></script>
<script src="/js/sol.js" type="text/javascript"></script>
<style type="text/css" media="screen">
@import "/css/sol.css";
html {
overflow-x:hidden;
overflow-y:scroll;
}
</style>
<style type="text/css" media="print">
@import "/css/sol-print.css";
</style>

</head>
<body class="print-friendly">	
<div id="content-print">
<div id="column-print" class="bg-fff">
<BEAN:define id="publication" name="publication" type="neo.xredsys.api.Publication" />
<div class="clear width-625 bg-fff padding-top-10">
<div class="print-friendly-hidden text-center padding-bottom-5">
<a href="#" onclick="window.print();return false;" name="&lid=PrintImage&lpos=Print"><img src="/img/buttons/btn-print.gif" alt="Print" /></a></div>	
<h2 class="bg-c00 text-fff margin-bottom-5"><img src="/img/text/print-logo-the-sun.gif" alt="Print" width="87" height="32" /></h2>
<div class="clear bg-fff margin-bottom-10">
<div class="padding-bottom-2 black-solid-line"></div>
<h2 class="text-uppercase padding-left-2">News</h2>
<div class="padding-bottom-5 black-solid-line"></div>
</div>
</div>
<div class="width-682">

<div class="padding-bottom-5">
<div class="clear padding-top-10"></div>
<h1 class="medium-centered">
Yobs on film taunting<br/>tragic neighbour
</h1>
</div>
<div class="text-center">
<div id="ltbx100" class="ltbx-slideshow">
<div class="ltbx-loader" style="width:682px;">
<div class="ltbx-img">
<img src="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_1__682_1003992a.jpg" style="width:682px;height:400px;" alt="Loading"/>
</div>
<div class="ltbx-load-layer" style="margin-top:-400px;width:682px;height:400px;">
<img style="margin-top:184.0px;" src="/img/lightbox/loading.gif" alt="Loading Animation"/>
</div>
<div class="ltbx-label" style="width:682px;">

Torment ... David Askew confronted by yobs in his garden 
</div>
</div>
<div id="k100r1c1t5w682h400" class="ltbx-gallery">
<p class="ltbx-var ltbx-hbxpn">ltbx2890110</p>
<p class="ltbx-var ltbx-gap-height">40</p>
<p class="ltbx-var ltbx-nav-loop">1</p>
<p class="ltbx-var ltbx-bk-pad">0</p>
<p class="ltbx-var ltbx-url">/sol/</p>
<p class="ltbx-var ltbx-logo">1</p>
<div class="ltbx-container">

<div class="ltbx-scroller">
<div class="ltbx-group">
<div class="ltbx-block">
<a title="Torment ... David Askew confronted by yobs in his garden" href="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_1__682_1003992a.jpg" class="ltbx-img" style="width:682px;height:400px;">
<img src="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_1__682_1003992a.jpg" alt="Torment ... David Askew confronted by yobs in his garden"/>
<div class="ltbx-msg">
<div class="ltbx-tab"></div><div class="icon-slideshow ltbx-icon">Slideshows	</div>
</div>
</a>
<div class="ltbx-label" style="width:682px;">
Torment ... David Askew confronted by yobs in his garden 
</div>
</div>
<div class="ltbx-block">
<a title="Protection ... CCTV had been installed out the back of Askew's house" href="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_2__1003990a.jpg" class="ltbx-img" style="width:682px;height:400px;">
<img src="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_2__1003990a.jpg" alt="Protection ... CCTV had been installed out the back of Askew's house"/>

<div class="ltbx-msg">
<div class="ltbx-tab"></div><div class="icon-slideshow ltbx-icon">Slideshows	</div>
</div>
</a>
<div class="ltbx-label" style="width:682px;">
Protection ... CCTV had been installed out the back of Askew's house 
</div>
</div>
<div class="ltbx-block">
<a title="Tragic ... shots show Askew clearly being taunted by yob" href="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_3__1003991a.jpg" class="ltbx-img" style="width:682px;height:400px;">
<img src="http://img.thesun.co.uk/multimedia/archive/01003/David-Askew_3__1003991a.jpg" alt="Tragic ... shots show Askew clearly being abused by yobTragic ... shots show Askew clearly being abused by yob"/>
<div class="ltbx-msg">
<div class="ltbx-tab"></div><div class="icon-slideshow ltbx-icon">Slideshows	</div>
</div>
</a>

<div class="ltbx-label" style="width:682px;">
Tragic ... shots show Askew clearly being taunted by yob 
</div>
</div>
<div class="clr"></div>
</div>
</div>
</div>
<div class="ltbx-nav">
<div class="ltbx-lft"></div>
<div class="ltbx-pager"></div>
<div class="ltbx-rgt"></div>
</div>
</div>
<img class="ltbx-load-init" src="/img/global/spacer.gif" alt="spacer" onload="jCsl.init({id:100,on:true});" /> 
</div>
</div>

<div class="clear-left">
<div>
<div class="clear-left">
<p class="display-byline">
By STEWART WHITTINGHAM
</p>
<div class="padding-top-10 padding-bottom-10 clear-left">
<div class="center-div width-280">
<p class="display-byline">Published: Today</p>
</div>
</div>
<div class="clear-left"></div>
</div>
<div class="clear-left padding-bottom-7"></div>
</div>
<h2 class="padding-bottom-7" style="font-size: 1.05em; line-height: 1.05em;">

SHOCKING footage of the bullying that drove a man to his death emerged 
yesterday.
</h2><p class="article"></p><p class="article"><div style="width:180px" class="margin-top-5 margin-right-10 padding-bottom-5 float-left"><img src="http://img.thesun.co.uk/multimedia/archive/01003/SNN1211_2__1003560a.jpg" border="0" alt="david askew" title="david askew" /><div class="img-cap">Hounded ... David the 'gent'</div></div><p class="article">
It captured tragic David Askew crying in anguish as yobs tormented him.
</p><p class="article"></p><p class="article">
Their victim, who had learning difficulties, collapsed and died outside his 
home this week when his gate was smashed after 20 years' intimidation by the 
Mad Dogs Gang - some aged just six.
</p><p class="article"></p><p class="article">
He was pelted with bricks and hounded for cash and cigs by the thugs on his 
estate in Hattersley, Greater Manchester.
</p><p class="article">
Neighbour Lynne Barker, 47, who filmed his ordeal seven years ago, said: 
"David suffered hell because of these kids.
</p><p class="article"></p><p class="article">
"They threw stones, abused him and threatened him as he had a mental age of 
eight and was an easy target." 
</p><p class="article">
Her film also shows David, 64, who had CCTV installed, biting his hand in 
frustration then trying to escape the gang.
</p><p class="article">
One hoodie said yesterday: "We all did it. But I know now he must have been 
scared."
</p><p class="article">

Kial Cottingham, 18, is due in court today after being charged with 
harassment. 
</p><p class="article">
A lad of 18 arrested on suspicion of manslaughter on Thursday has been bailed 
until June 7
</p><p class="article"></p><p class="article"></p><p class="article"></p><p class="article">
<object width="640" height="385"><param name="movie" 
value="http://www.youtube.com/v/06ElR5Ydetk&hl=en_GB&fs=1&"></param><param 
name="allowFullScreen" value="true"></param><param 
name="allowscriptaccess" value="always"></param><embed 
src="http://www.youtube.com/v/06ElR5Ydetk&hl=en_GB&fs=1&" 
type="application/x-shockwave-flash" allowscriptaccess="always" 
allowfullscreen="true" width="640" height="385"></embed></object>
</p><p class="article"></p><p class="article"></p><p class="article" align="right"><a href="mailto: s.whittingham@the-sun.co.uk"target="_self" title="s.whittingham@the-sun.co.uk">s.whittingham@the-sun.co.uk</a></p> 
<div class="float-left width-300 padding-right-10 padding-bottom-10 padding-top-10 ">
<!-- null -->
</div>
<!-- Article End -->
<div class="float-left" id="chicklets-panel">
<style>
#column2 {overflow: hidden;}
</style>
<img src="http://img.thesun.co.uk/multimedia/archive/00429/spacer_429055a.gif" width="389" height="1" />
<script type="text/javascript" language="JavaScript">
<!--
var showurl;
function urlencode() {
var newURL;
var tempstr;
var URL = location.href;
var len = URL.length;
for (j=0;j<len;j++) {
tempstr = URL.charCodeAt(j);
if (j == 0) newURL = escape(String.fromCharCode(tempstr));
else newURL = newURL + escape(String.fromCharCode(tempstr));	
}
return newURL;
}
function digg() {
showurl = urlencode();
window.open("http://digg.com/submit?phase=2&amp;url=" + showurl, "digg", "width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1"); 
}
function delicious() {
showurl = urlencode();
window.open("http://del.icio.us/post?url=" + showurl, "digg", "width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1"); 
}
function reddit() {
showurl = urlencode();
window.open("http://reddit.com/submit?url=" + showurl, "digg" ,"width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1")
}
function newsvine() {
showurl = urlencode();
window.open("http://www.newsvine.com/_tools/seed&amp;save?u=" + showurl, "digg", "width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1"); 
}
function nowpublic() {
showurl = urlencode();
window.open("http://view.nowpublic.com/?src=" + showurl, "digg", "width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1"); 
}
function facebook() {
showurl = urlencode();
window.open("http://www.facebook.com/share.php?u=" + showurl, "facebook", "width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1"); 
}
function fark() {
showurl = urlencode();
window.open("http://cgi.fark.com/cgi/fark/farkit.pl?h=" + document.title + "&amp;u=" + showurl, "fark", "width=470,height=452,status=1,toolbar=1,location=1,scrollbars=1,menubar=1,resizable=1"); 
}
function myspace() {
showurl = urlencode();
window.open("http://www.myspace.com/Modules/PostTo/Pages/?u=" + showurl + "&amp;t=" + document.title + "&amp;c=" + document.title + "&amp;l=3"); 
}
-->
</script>

<div style="width:100%; background:white; text-align:left; border:0 solid black;overflow: hidden;">
<div style="border-bottom:silver 0 solid; width:100%; clear:both; margin-bottom:5px; padding:0; font-size:0.8em; line-height:1.7em; float: left;"><b>Share this article </b><a onclick="fPopUp(500,500,'http://extras.thesun.co.uk/share_this_article/copy.htm'); return false;" href="javascript:">What is this?</a>
</div>
<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:digg();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/Digg__372738a.gif" alt="DIGG" hspace="3" align="left" valign="middle" style="padding-right:2px;" />Digg it!</a></div>
<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:delicious();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/del_ic_ious_372739a.gif" alt="DEL.ICIO.US" hspace="3" align="left" valign="middle" style="padding-right:2px;" />del.icio.us</a></div>
<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:myspace();" style="font-size:0.8em; line-height:1.7em;" title="MySpace">
<img src="http://x.myspace.com/images/myspace_logo_16.gif" alt="MYSPACE" title="MySpace" hspace="3" align="left" valign="middle" style="padding-right:2px;" />MySpace</a></div>
<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:facebook();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/Facebook_372737a.gif" alt="FACEBOOK" hspace="3" align="left" valign="middle" style="padding-right:2px;" />Facebook</a></div>

<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:fark();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/Fark_372736a.gif" alt="FARK" hspace="3" border="0" align="left" valign="middle" style="padding-right:2px;" />Fark</a></div>
<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:reddit();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/Readit_372723a.gif" alt="REDDIT" hspace="3" align="left" valign="middle" style="padding-right:2px;" />Reddit</a></div>
<!--
<div style="height:30px; float:left; margin-right:16px;"><a href="javascript:void(0);" onclick="javascript:newsvine();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/Newsvine_372735a.gif" alt="NEWSVINE" hspace="3" align="left" valign="middle" style="padding-right:2px;" />Newsvine</a></div>
-->
<preform>
<div style="height:30px; float:left; margin-right:14px; font-size: 12px; margin-top: 3px;">
<script>
document.write("<scr"+"ipt type=\"text/javasc"+"ript\" src=\"http://d.yimg.com/ds/badge2.js\" badgetype=\"text\">"+location.href+"</scr"+"ipt>");
</script>
</div>
</preform>
<div style="height:30px; float:left; margin-right:14px;"><a href="javascript:void(0);" onclick="javascript:nowpublic();" style="font-size:0.8em; line-height:1.7em;">
<img src="http://www.thesun.co.uk/multimedia/archive/00372/Nowpublic_372724a.gif" alt="NOWPUBLIC" hspace="3" align="left" valign="middle" style="padding-right:2px;" />NowPublic</a></div>
<div style="clear: both;"></div>

</div>
</div>
<div class="clear"></div>
</div>
<div class="padding-top-10"></div>
<div class="clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line">	
<em>
<p>
&copy; 2009 News Group Newspapers Ltd. &quot;The Sun&quot;, &quot;Sun&quot;, &quot;Sun 
Online&quot; are registered trademarks or trade names of News Group 
Newspapers Limited. This service is provided on News Group Newspapers' <a target="_parent" href="/sol/homepage/hygiene/terms_conditions/article254101.ece">Standard 
Terms and Conditions</a> in accordance with our <a target="_blank" href="http://www.nidp.com/">Privacy 
Policy</a> . To inquire about a licence to reproduce material, visit our <a target="_blank" href="http://www.thesun.co.uk/sol/homepage/article2636543.ece">Syndication 
site</a> . View our online Press Pack. For other inquiries, <a target="_parent" href="/sol/homepage/hygiene/contact_us/article251760.ece">Contact 
Us</a> . To see all content on The Sun, please use the <a target="_parent" href="/sol/homepage/hygiene/site_map/">Site 
Map</a>. 

</p>
<p style="text-align: left;"><a href='http://the-acap.org/acap-enabled.php' border='0' target='new'><img src='http://img.thesun.co.uk/multimedia/archive/00607/acap_enabled_small__607912a.gif' border="0" /></a></p>
<script type="text/javascript">
var nTopSearchTimeDelay = 0;
</script>
<style>
input#mast-head-search-text {max-height: 18px;}
#masthead-search {margin-top:1px; *margin-top:0px;}
</style>
</em>
</div>
</div>
</div>
</body>
</html>
What tags do I need to keep and what ones should I drop?

Thanks.
Ekips is offline