| 
	|||||||
![]()  | 
            
        
    
| 
             | 
        Thread Tools | Search this Thread | 
| 
			
			 | 
		#1 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 2 
				Karma: 10 
				Join Date: May 2011 
				
				
				
				Device: Kindle 
				
				
				 | 
	
	
	
		
		
			
			 
				
				Recipes for "Dilema Veche" and "Observatorul Cultural" - weekly Romanian magazines
			 
			
			
			After 3 months of using them with no major problem, I'm ready to make them public: 
		
	
		
		
		
		
		
		
		
		
		
		
	
	Dilema Veche. Code: 
	class DilemaVeche(BasicNewsRecipe):
    title          = u'Dilema Veche' # apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza ca fiind creatorul fiecarui articol in feed-ul RSS)
    __author__            = 'song2' # inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script
    description           = '"Sint vechi, domnule!" (I.L. Caragiale)'
    publisher             = 'Adevarul Holding'
    oldest_article        = 7
    max_articles_per_feed = 200
    encoding              = 'utf8'
    language = 'ro'
    masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
    publication_type = 'magazine'
    feeds = [    
                ('Editoriale si opinii - Situatiunea', 'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'),                      
                ('Editoriale si opinii - Pe ce lume traim', 'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'),
                ('Editoriale si opinii - Bordeie si obiceie', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
                ('Editoriale si opinii - Talc Show', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
                ('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'),
                ('La zi in cultura - Dilema va recomanda', 'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'),
                ('La zi in cultura - Carte', 'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'),
                ('La zi in cultura - Film', 'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'),
                ('La zi in cultura - Muzica', 'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'),
                ('La zi in cultura - Arte performative', 'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'),
                ('La zi in cultura - Arte vizuale', 'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'),
                ('Societate - Ieri cu vedere spre azi', 'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'),
                ('Societate - Din polul opus', 'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'),
                ('Societate - Mass comedia', 'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'),
                ('Societate - La singular si la plural', 'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'),
                ('Oameni si idei - Educatie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
                ('Oameni si idei - Polemici si dezbateri', 'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'),
                ('Oameni si idei - Stiinta si tehnologie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
                ('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/005/0/feed')
                 ]
    remove_tags_before = dict(name='div',attrs={'class':'spacer_10'})
    remove_tags = [
        dict(name='div', attrs={'class':'art_related_left'}),
        dict(name='div', attrs={'class':'controale'}),
		dict(name='div', attrs={'class':'simple_overlay'}),
    ]
    remove_tags_after = [dict(id='facebookLike')]
    remove_javascript = True
    no_stylesheets        = True
    remove_empty_feeds = True
    extra_css             = """
        body{font-family: Georgia,Times,serif }
        img{margin-bottom: 0.4em; display:block}
                            """
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup('http://dilemaveche.ro')
        link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
        if link_item and link_item.a:
           cover_url = link_item.a['href']
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover_url) 
        except: #daca nu gaseste pdf-ul
            self.log("\nPDF indisponibil")
            link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
            if link_item and link_item.img:
                cover_url = link_item.img['src']
            br = BasicNewsRecipe.get_browser()
            try:
                 br.open(cover_url)
            except: #daca nu gaseste nici imaginea mica mica
                print('Mama lor de nenorociti! nu este nici pdf nici imagine')
                cover_url ='http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
        return cover_url
    cover_margins = (10, 15, '#ffffff')
Code: 
	import re
from calibre.web.feeds.news import BasicNewsRecipe
class ObservatorulCultural(BasicNewsRecipe):
    title                 = u'Observatorul cultural'
    __author__            = 'song2' #prelucrat dupa un script de http://www.thenowhereman.com
    encoding = 'utf-8'
    language = 'ro'
    publication_type = 'magazine'
    description = 'Spiritul critic in acţiune\n'
    no_stylesheets        = True
    remove_javascript     = True
    masthead_url='http://www.observatorcultural.ro/userfiles/article/sigla%20Observator%20cultural_02231058.JPG'
    keep_only_tags = [
        dict(name='div', attrs={'class':'detaliuArticol'})]
    remove_tags = [dict(name='div', attrs={'class':'comentariiArticol'}),
         dict(name='div', attrs={'class':'postComment'}),
         dict(name='div', attrs={'class':'utileArticol'}),
         dict(name='p', attrs={'class':'butonComenteaza'}),
         dict(name='h5'),
         dict(name='div', attrs={'style':'margin-top: 0px; padding-top: 0px;'})
         ]
    def parse_index(self):
        soup = self.index_to_soup('http://www.observatorcultural.ro/Arhiva*-archive.html')
        issueTag = soup.find('a', href=re.compile("observatorcultural.ro\/Numarul"))
        issueURL = issueTag['href']
        print issueURL;
        issueSoup = self.index_to_soup(issueURL)
        feeds = []
        stories = []
        for categorie in issueSoup.findAll('dl',attrs={'class':'continutArhive'}):
            categ=self.tag_to_string(categorie.find('dt'))
            for story in categorie.findAll('dd'):
                title=[]
                for bucatele in story.findAll('a'):
                    title.append(bucatele)
                if len(title)==1: #daca articolul nu are autor
                    stories.append({
                        'title' : self.tag_to_string(title[0]),
                        'url'   : title[0]['href'],
                        'date'  : '',
                        'author' : ''})
                else: # daca articolul are autor len(title)=2
                    stories.append({
                        'title' : self.tag_to_string(title[1]),
                        'url'   :title[1]['href'],
                        'date'  : '',
                        'author' : self.tag_to_string(title[0])})
                    print(self.tag_to_string(title[0]))
                if 'Editorial' in categ:  
                    global coverpage
                    coverpage=title[1]['href']  # am luat link-ul spre editorial
            feeds.append((categ,stories))
            stories = []
        print feeds
        return feeds
#procedura de luat coperta 
    def get_cover_url(self):
        soup = self.index_to_soup(coverpage)
        link_item = soup.find('a',attrs={'rel':'lightbox'}) # caut imaginea textului
        a=''
        cover_url = a.join(link_item.img['src'].split('_details_'))
        return cover_url
 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#2 | 
| 
			
			
			
			 creator of calibre 
			
			![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 45,609 
				Karma: 28549044 
				Join Date: Oct 2006 
				Location: Mumbai, India 
				
				
				Device: Various 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			There is already a builtin recipe for dilema veche, is your different?
		 
		
	
		
		
		
		
		
		
		
		
		
		
	
	 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| Advert | |
| 
         | 
    
| 
			
			 | 
		#3 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 2 
				Karma: 10 
				Join Date: May 2011 
				
				
				
				Device: Kindle 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			The old version doesn't do much, it doesn't even include the articles' titles. 
		
	
		
		
		
		
		
		
		
		
		
		
	
	The new version categorizes articles, gets the real cover for every week's issue, includes the title and the author's name in every article and uses masthead_url. It's just nicer  
		 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#4 | 
| 
			
			
			
			 creator of calibre 
			
			![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 45,609 
				Karma: 28549044 
				Join Date: Oct 2006 
				Location: Mumbai, India 
				
				
				Device: Various 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			OK      .
		 
		
	
		
		
		
		
		
		
		
		
		
		
	
	 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#5 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 8 
				Karma: 10 
				Join Date: Jun 2011 
				Location: RO 
				
				
				Device: Kobo Touch 
				
				
				 | 
	
	
	
		
		
			
			 
			
			Super treabă. Am scos și eu un ePub cu Dilema dar am ceva probleme: 
		
	
		
		
		
		
		
		
		
		
		
		
	
	„Bordeie și obiceie” și „Tîlc show” au aceleași articole. Plus problema diacriticelor. Am modificat css-ul, am inclus un font și tot mai am probleme cu unele titluri. În rest se vede bine. Varianta online e o replică variantei tipărite? Cum apar articolele pe web? Muțumesc.  | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| Advert | |
| 
         | 
    
| 
			
			 | 
		#6 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 4 
				Karma: 10 
				Join Date: Jul 2011 
				
				
				
				Device: nook 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			Multzumesc frumos pentru efortul depus song2, m-ai scutit de ceva munca gen copy/paste  
		
	
		
		
		
		
		
		
		
		
		
		
	
	![]() Am o singura problema cu scriptul asta, se pare ca nu extrage toate articolele de pe pe site, de exemplu la ultimul numar de saptamina asta, ala cu criza educatiei umaniste, articolele lui cosasu, catalin stefanescu si inca citeva nu apar. Ai cumva vreo sugestie/imbunatatire la script (sorry, nu ma pricep la 'scriptare'). Mersi mult. Numai bine.  | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#7 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 4 
				Karma: 10 
				Join Date: Jul 2011 
				
				
				
				Device: nook 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			Am tot incercat si am modificat recipe-ul de mai sus in felul urmator (sint articole care nu fac parte din subcategorie si care sint acum extrase): 
		
	
		
		
		
		
		
		
		
		
		
		
	
	class DilemaVeche(BasicNewsRecipe): title = u'Dilema Veche' # apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza ca fiind creatorul fiecarui articol in feed-ul RSS) __author__ = 'song2' # inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script description = '"Sint vechi, domnule!" (I.L. Caragiale)' publisher = 'Adevarul Holding' oldest_article = 7 max_articles_per_feed = 200 encoding = 'utf8' language = 'ro' masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png' publication_type = 'magazine' feeds = [ ('Editoriale si opinii - Situatiunea', 'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'), ('Editoriale si opinii - Pe ce lume traim', 'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'), ('Editoriale si opinii - Bordeie si obiceie', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'), ('Editoriale si opinii - Talc Show', 'http://www.dilemaveche.ro/taxonomy/term/39/0/feed'), ('Editoriale si opinii - Editoriale si opinii', 'http://www.dilemaveche.ro/taxonomy/term/1/0/feed'), ('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'), ('La zi in cultura - Dilema va recomanda', 'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'), ('La zi in cultura - Carte', 'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'), ('La zi in cultura - Film', 'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'), ('La zi in cultura - Muzica', 'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'), ('La zi in cultura - Arte performative', 'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'), ('La zi in cultura - Arte vizuale', 'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'), ('La zi in cultura - La zi in cultura', 'http://www.dilemaveche.ro/taxonomy/term/2/0/feed'), ('Societate - Ieri cu vedere spre azi', 'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'), ('Societate - Din polul opus', 'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'), ('Societate - Mass comedia', 'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'), ('Societate - La singular si la plural', 'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'), ('Societate - Societate', 'http://www.dilemaveche.ro/taxonomy/term/3/0/feed'), ('Oameni si idei - Educatie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'), ('Oameni si idei - Polemici si dezbateri', 'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'), ('Oameni si idei - Stiinta si tehnologie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'), ('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/005/0/feed') ] remove_tags_before = dict(name='div',attrs={'class':'spacer_10'}) remove_tags = [ dict(name='div', attrs={'class':'art_related_left'}), dict(name='div', attrs={'class':'controale'}), dict(name='div', attrs={'class':'simple_overlay'}), ] remove_tags_after = [dict(id='facebookLike')] remove_javascript = True no_stylesheets = True remove_empty_feeds = True extra_css = """ body{font-family: Georgia,Times,serif } img{margin-bottom: 0.4em; display:block} """ def get_cover_url(self): cover_url = None soup = self.index_to_soup('http://dilemaveche.ro') link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture '}) if link_item and link_item.a: cover_url = link_item.a['href'] br = BasicNewsRecipe.get_browser() try: br.open(cover_url) except: #daca nu gaseste pdf-ul self.log("\nPDF indisponibil") link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture '}) if link_item and link_item.img: cover_url = link_item.img['src'] br = BasicNewsRecipe.get_browser() try: br.open(cover_url) except: #daca nu gaseste nici imaginea mica mica print('Mama lor de nenorociti! nu este nici pdf nici imagine') cover_url ='http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png' return cover_url cover_margins = (10, 15, '#ffffff') Note: aveti vreo idee cum pot fi extrase numere de dilema veche trecute?  | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#8 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 1 
				Karma: 10 
				Join Date: Feb 2012 
				
				
				
				Device: Kindle 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			Hello, 
		
	
		
		
		
		
		
		
		
		
		
		
	
	Is there anyone that can provide an updated recipe for the subscription based 'Dilema Veche'? Thank you  | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
| 
			
			 | 
		#9 | 
| 
			
			
			
			 Junior Member 
			
			![]() Posts: 4 
				Karma: 10 
				Join Date: Jul 2011 
				
				
				
				Device: nook 
				
				
				 | 
	
	
	
		
		
		
		
		 
			
			I am trying for couple of days now without success... Maybe a more savy contributor can help us...
		 
		
	
		
		
		
		
		
		
		
		
		
		
	
	 | 
| 
		 | 
	
	
	
		
		
		
		
			 
		
		
		
		
		
		
		
			
		
		
		
	 | 
![]()  | 
            
        
    
| Tags | 
| dilema veche, epub, observatorul cultural, recipe | 
            
  | 
    
			 
			Similar Threads
		 | 
	||||
| Thread | Thread Starter | Forum | Replies | Last Post | 
| "Settings," then "311" - Int'l Kindle searches for wireless providers in the area | Dr. Drib | Amazon Kindle | 2 | 08-28-2011 11:27 AM | 
| Woher bekomme ich "Infinite Jest" oder "Unendlicher Spaß" von David Foster Wallace? | bitschnau | Erste Hilfe | 3 | 11-01-2010 02:22 PM | 
| "Zeit-Odyssee"-Trilogie droht das "dunkle Turm"-Schicksal | ThR | E-Books | 4 | 02-10-2010 06:18 AM |