import re
from libprs500.ebooks.lrf.web.profiles import DefaultProfile

class ContraCostaTimes(DefaultProfile):

    title = 'Contra Costa Times'
    max_recursions = 2
    max_articles_per_feed = 20
    
    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
[
    	(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
    	(r'<BODY.*?>.*? <div class="apple-rss-content', lambda match : '<BODY> <div class="apple-rss-content'),
    	(r'<div class="apple-rss-article.*?>.*?<div class="apple-rss-article-footer"></div>', lambda match : ''),
	    (r'<div id="apple-rss-pagination">.*?</BODY>', lambda match : '</BODY>'),
	    (r'<script>.*?</script>', lambda match : ''),
	    (r'<body class="bodyStyle">.*?<H1 class="articleTitle">', lambda match : '<body><H1 class="articleTitle">'),
	    (r'<hr.*?>', lambda match : ''),
	    (r'<div style="width:336px" class="articleEmbeddedAdBox"></div>', lambda match : ''),
	    (r'<div align="center" class="adElement">.*?</div>', lambda matach : ''),
	    (r'<div class="articlePositionFooter">.*?</body>', lambda match : '<body'),
	    (r'<div.*?>', lambda match : ''),
	    (r'</div>', lambda match : ''),
	    (r'Advertisement', lambda match : ''),
	    (r'<p><br style="clear:both;"/>.*?</body>', lambda match : '</body>'),

    ]
    ]   
     

  
    def get_feeds(self):
     	return [ ('Most Viewed', 'http://extras.mnginteractive.com/live/xsl/memv/xml/571_most_viewed_rss.xml'),
     	] 

    ##def print_version(self, url):
	##return url	