from datetime import datetime, date, timedelta from calibre.web.feeds.templates import Template, CLASS from lxml.html.builder import HTML, HEAD, TITLE, STYLE, DIV, BODY, BR, A, HR, UL from BeautifulSoup import BeautifulSoup import re class PTVNavBarTemplate(Template): """ Same as calibre.web.feeds.templates.NavBarTemplate but without the 'This article was downloaded by calibre from...' text at the bottom. """ def _generate(self, bottom, feed, art, number_of_articles_in_feed, two_levels, url, __appname__, prefix='', center=True, extra_css=None, style=None): head = HEAD(TITLE('navbar')) if prefix and not prefix.endswith('/'): prefix += '/' align = 'center' if center else 'left' navbar = DIV(CLASS('calibre_navbar', 'calibre_rescale_70', style='text-align:'+align)) if bottom: navbar.append(HR()) self.root = navbar class ProgramTV(BasicNewsRecipe): title = u'program TV' oldest_article = 100 max_articles_per_feed = 150 __author__ = 'lukaszd' description = 'program TV na cały tydzień' language = 'pl' encoding = 'iso-8859-2' no_stylesheets = True remove_javascript = True timefmt = '' recursions = 1 programy = [ [ 1, "TVP 1" ], [ 2, "TVP 2" ], [ 368, "TVP Kultura" ], [ 17, "TVN" ], [ 238, "TVN 7" ], [ 5, "Polsat" ], [ 14, "HBO" ], [ 201, "HBO 2" ], [ 67, "Discovery" ], [ 360, "National Geographic" ], [ 355, "Animal Planet" ], # [ 473, "13th Street Universal" ], # [ 472, "4funTV" ], # [ 85, "Ale Kino" ], # [ 481, "Animal Planet HD" ], # [ 174, "AXN" ], # [ 418, "AXN Crime" ], # [ 493, "AXN HD" ], # [ 416, "AXN Sci fi" ], # [ 449, "BBC CBeebies" ], # [ 420, "BBC Entertainment" ], # [ 448, "BBC Knowledge" ], # [ 415, "BBC Lifestyle" ], # [ 496, "Boomerang" ], # [ 13, "CANAL" ], # [ 179, "CANAL Film" ], # [ 484, "CANAL Gol" ], # [ 183, "CANAL Sport" ], # [ 361, "Cartoon Network" ], # [ 437, "Cinemax" ], # [ 442, "Cinemax 2" ], # [ 71, "Club TV" ], # [ 400, "Comedy Central" ], # [ 213, "Comedy Central Family" ], # [ 501, "DaVinci Learning" ], # [ 483, "Discovery HD Showcase" ], # [ 487, "Discovery Historia" ], # [ 356, "Discovery Science" ], # [ 186, "Discovery World" ], # [ 455, "Disney Channel" ], # [ 495, "Disney Junior" ], # [ 74, "Disney XD" ], # [ 452, "Domo TV" ], # [ 440, "Edusat" ], # [ 482, "Eska TV" ], # [ 42, "Eurosport" ], # [ 364, "Eurosport 2" ], # [ 492, "Eurosport 2 HD" ], # [ 497, "Eurosport HD" ], # [ 79, "Extreme Sports" ], # [ 90, "Fashion TV" ], # [ 436, "FilmBox" ], # [ 433, "FilmBox Extra" ], # [ 499, "FilmBox Family" ], # [ 469, "FilmBox HD" ], # [ 427, "Fox Life" ], # [ 421, "HBO Comedy" ], # [ 200, "Hyper" ], # [ 500, "Investigation Discovery" ], # [ 392, "ITV" ], # [ 208, "Kab Toya Lodz" ], # [ 209, "Kab WTK Poznan" ], # [ 205, "Kino Polska" ], # [ 438, "KuchniaTV" ], # [ 378, "MCM POP" ], # [ 485, "MGM HD" ], # [ 417, "Mini Mini" ], # [ 439, "Movies 24" ], # [ 216, "MTV" ], # [ 428, "National Geographic HD" ], # [ 426, "National Geographic Wild" ], # [ 425, "Nickelodeon Polska" ], # [ 488, "nSPORT" ], # [ 451, "Orange Sport" ], # [ 15, "Planete" ], # [ 8, "Polonia 1" ], # [ 6, "Polsat 2" ], # [ 435, "Polsat Cafe" ], # [ 490, "Polsat Film" ], # [ 491, "Polsat Futbol" ], # [ 480, "Polsat News" ], # [ 494, "Polsat Play" ], # [ 224, "Polsat Sport" ], # [ 408, "Polsat Sport Extra" ], # [ 489, "Polsat Sport HD" ], # [ 430, "PR 1" ], # [ 432, "PR 2" ], # [ 429, "PR 3" ], # [ 431, "Radio ZET" ], # [ 477, "Rebel TV" ], # [ 434, "Religia TV" ], # [ 444, "RMF FM" ], # [ 468, "Scifi Universal" ], # [ 407, "Sport Klub" ], # [ 405, "Sport Klub " ], # [ 402, "Superstacja" ], # [ 403, "TCM" ], # [ 16, "Tele 5" ], # [ 474, "TLC" ], # [ 374, "Trace TV" ], # [ 261, "Travel Channel" ], # [ 486, "TV 1000" ], # [ 18, "TV 4" ], # [ 399, "TV Biznes" ], # [ 235, "TV Puls" ], # [ 413, "TV Silesia" ], # [ 151, "TVN 24" ], # [ 419, "TVN CNBC Biznes" ], # [ 475, "TVN Meteo" ], # [ 375, "TVN Style" ], # [ 265, "TVN Turbo" ], # [ 19, "TVP Bialystok" ], # [ 20, "TVP Bydgoszcz" ], # [ 21, "TVP Gdansk" ], # [ 359, "TVP Gorzow Wielkopolski" ], # [ 456, "TVP HD" ], # [ 471, "TVP Historia" ], # [ 233, "TVP Info" ], # [ 22, "TVP Katowice" ], # [ 23, "TVP Krakow" ], # [ 24, "TVP Lublin" ], # [ 25, "TVP Lodz" ], # [ 365, "TVP Olsztyn" ], # [ 3, "TVP Polonia" ], # [ 26, "TVP Poznan" ], # [ 27, "TVP Rzeszow" ], # [ 479, "TVP Seriale" ], # [ 406, "TVP Sport" ], # [ 28, "TVP Szczecin" ], # [ 262, "TVP Warszawa" ], # [ 30, "TVP Wroclaw" ], # [ 441, "Universal Channel" ], # [ 366, "Viasat Explorer" ], # [ 367, "Viasat History" ], # [ 92, "VIVA Polska" ], # [ 217, "ZigZap" ], # [ 267, "Zone Europa" ], # [ 84, "Zone Reality" ], # [ 78, "Zone Romantica" ], ] dnityg = [u'poniedziałek', u'wtorek', u'środa', u'czwartek', u'piątek', u'sobota', u'niedziela'] def __init__(self, options, log, progress_reporter): """ Constructor. """ BasicNewsRecipe.__init__(self, options, log, progress_reporter) self.navbar = PTVNavBarTemplate() def preprocess_raw_html(self, raw_html, url): raw_html += '' raw_html += url raw_html += '' return raw_html def preprocess_html(self, soup): url = soup.find('urlsource').string if re.search( 'program\.html', url ): progname = re.search( r'name;=([^&]+)', url ).group(1) progname = re.sub( '-', ' ', progname ) progdate = re.search( r'date=(\d{4}-\d\d-\d\d)', url ).group(1) progdztg = date( int(progdate[:4]), int(progdate[5:7]), int(progdate[8:10]) ) progdztg = self.dnityg[ progdztg.weekday() ] divs = soup.findAll('div', attrs={'class':'programIn clra'}) proghtml = ''.join([ '

', progname, '

', progdate, ' (', progdztg, ')

' ]) for d in divs: godzina = d.find('strong').string tytullink = d.find('h4').find('a') if tytullink.has_key('href'): linka = tytullink['href'] else: linka = '' tytul = tytullink.string proghtml = ''.join([ proghtml, '', godzina, '', ' ', tytul, '', '
' ]) proghtml += '

' return BeautifulSoup( '' + proghtml + '' ) else: opis = soup.find('div', attrs={'class':'content art'}) druk = opis.find('a', attrs={'class':'print'}) druk.extract() return opis def listaprog(self, dzien): articles = [] gdzie = 'http://tv.wp.pl/program.html?date='+dzien for p in self.programy: pr = re.sub( ' ', '-', p[1] ) url = gdzie + '&name=' + pr + '&stid=' + str(p[0]) articles.append( { 'title' : p[1], 'url' : url, 'date' : dzien } ) return articles def parse_index(self): feeds = [] dzien = date.today() for n in range( 0, 7 ): dziens = dzien.strftime('%Y-%m-%d') tytul = dzien.strftime("%d.%m") + ", " + self.dnityg[ dzien.weekday() ] feeds.append((tytul, self.listaprog(dziens))) dzien = dzien + timedelta(1) return feeds