import re
from libprs500.ebooks.lrf.web.profiles import DefaultProfile

class DemGaz(DefaultProfile):

    title = 'Democrat Gazette'
    max_recursions = 2
    max_articles_per_feed = 100
    use_pubdate = False
    
    
    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
[
    	(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
    	(r'<BODY.*?>.*?<!-- start Entries -->', lambda match : '<BODY><!-- start Entries -->'),
	    (r'<!-- end Entries -->.*?</BODY>', lambda match : '</BODY>'),
	    (r'<script.*?>.*?</script>', lambda match : ''),
	    (r'<div class="apple-rss-article apple-rss-read" onclick=.*?<div class="apple-rss-article-body">', lambda match : ''),
	    (r'<img src=\'/images/logo_NWAnews.gif\' alt=\'NWAnews.com :: Northwest Arkansas\' News Source\'.*?>', lambda match : ''),
	    (r'<img src=\'/images/logo_adg.gif\'.*?>', lambda match : ''),

    ]
    ]

    def get_feeds(self):
          return [ ('NWANews', 'http://feeds.feedburner.com/nwanewsall'), ]
          
    def print_version(self, url):
         return url + 'print/'
         
