View Single Post
Old 12-20-2025, 09:49 PM   #2
readabit
Enthusiast
readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.readabit could sell banana peel slippers to a Deveel.
 
Posts: 44
Karma: 3034
Join Date: Mar 2012
Device: Boox Note Air 2 Plus, Samsung Galaxy S23 (base), Samsung Galaxy Tab S3
Lightbulb

I've cracked it!

Still have some more bugs to work out (some dates are not returning anything for some reason), but I am actually getting content now!

Code:
import json
import string, re # Brings in two powerful built-in modules: string for common string constants (like all lowercase/uppercase letters, digits, punctuation) and re (regular expressions) for advanced pattern matching and manipulation, letting you find, split, or replace text based on complex rules, with re being especially useful for text searching and data extraction.
from datetime import datetime, timedelta
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from urllib.parse import urlparse, urlsplit
from contextlib import closing
#from calibre.web.feeds import Feed, feed_from_xml, feeds_from_index, templates
from calibre.web.feeds import Article, Feed


class DailyOffice(BasicNewsRecipe):
	title       = 'The Daily Office Readings'
	__author__  = 'Anglican Church in North America'
	description = 'ACNA Book of Common Prayer Daily Readings'
	#timefmt = ' [%a, %d %b, %Y]'
	
	remove_tags = [dict(attrs={'class':['el-switch', 'asterisk']}),
		dict(name=['script', 'noscript', 'style'])]
	# no_stylesheets = True
	#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
	
	# auto_cleanup   = True
	# auto_cleanup_keep = '//*[@class="readingsPanel"]' # This is the key line to keep only content inside a specific class
	
	days_number = 6
	# max_articles_per_feed = days_number * 2
	max_articles_per_feed = days_number
	# https://api.dailyoffice2019.com/api/v1/readings/2025-12-21?absolution=lay&bible_translation=nasb&canticle_rotation=default&chrysostom=on&collects=rotating&confession=long-on-fast&ep_great_litany=ep_litany_off&family-creed=family-creed-no&family-opening-sentence=family-opening-sentence-fixed&family_collect=time_of_day&family_reading_audio=off&family_readings=brief&format=json&general_thanksgiving=on&grace=rotating&language_style=contemporary&language_style_for_our_father=traditional&lectionary=daily-office-readings&morning_prayer_invitatory=invitatory_traditional&mp_great_litany=mp_litany_off&national_holidays=us&o_antiphons=literal&psalm_style=whole_verse&psalm_translation=contemporary&psalms=contemporary&psalter=60&reading_audio=off&reading_cycle=1&reading_headings=on&reading_length=full&style=unison&suffrages=rotating&translation=nasb
	daily_office_settings = '?absolution=lay&bible_translation=nasb&canticle_rotation=default&chrysostom=on&collects=rotating&confession=long-on-fast&ep_great_litany=ep_litany_off&family-creed=family-creed-no&family-opening-sentence=family-opening-sentence-fixed&family_collect=time_of_day&family_reading_audio=off&family_readings=brief&format=json&general_thanksgiving=on&grace=rotating&language_style=contemporary&language_style_for_our_father=traditional&lectionary=daily-office-readings&morning_prayer_invitatory=invitatory_traditional&mp_great_litany=mp_litany_off&national_holidays=us&o_antiphons=literal&psalm_style=whole_verse&psalm_translation=contemporary&psalms=contemporary&psalter=60&reading_audio=off&reading_cycle=1&reading_headings=on&reading_length=full&style=unison&suffrages=rotating&translation=nasb'
	
	print('BEGIN!!!')
	
	
	my_articles = []
	today = datetime.now()
	print('CREATE ARTICLE LIST!!!')
	
# Generate URLs for the configured number of days
	for i in range(days_number):
		print('ARTICLE LIST #')
		print(i)
		current_date = today + timedelta(days=i)
		date_str = current_date.strftime('%Y-%m-%d') # Format the date into the URL format required by the website
	# Full Day.
		url = 'https://api.dailyoffice2019.com/api/v1/readings/{}'.format(date_str) + daily_office_settings
		article_title = current_date.strftime('Daily Prayer Readings for %B %d, %Y') # Create a unique title for each feed item
		# article_title += ' (' + url + ')' # For Debugging.
		#my_articles.append((article_title, url)) # Append the feed as a tuple: (title, url)
		my_articles.append({
			'title'       : article_title,
			'url'         : url
			#'date'        : format(date_str),
			#'description' : 'Daily Prayer',
			#'content'     : ''
        })
		# print('GETTING: ' + article_title + ': ' + url)
		
	print('ARTICLE LIST COMPLETED')
	print(my_articles)


	def parse_index(self):
		#print(self.title)
		#print(self.my_articles)
		feeds = []
		feeds.append((self.title, self.my_articles))
		return feeds

	
	def preprocess_raw_html(self, raw_html, url):
    # The 'soup' object initially holds the raw downloaded content
		#json_data = json.loads(soup.encode('utf-8')) # Decode and parse the JSON string
		print('BEGIN PROCESSING!!!')
		json_data = json.loads(raw_html) # Decode and parse the JSON string
				
    # Process the JSON data and build HTML
		new_html_content = "<html><body>"
		
		morning_prayer = json_data.get("services", {}).get("Morning Prayer", {}).get("readings", [])
		print('MORNING PRAYER: ')
		print(morning_prayer)
		new_html_content = f"<h1>Morning Prayer</h1>"
		print('Beging Morning Prayer...')
		for item in morning_prayer:
			#new_html_content += f"<h2>{item['title']}</h2>"
			full = item.get("full", {})
			if full.get("cycle") != "30": # Skip 30 Day Cycle Items.
				print('NAME (morning): ' + full.get('name'))
				new_html_content += f"<h2>{full.get('name')}</h2>"
				text = full.get("text")
				text = text.replace("<html><head></head><body>", "")
				text = text.replace("</body></html>", "")
				#html.unescape(element)
				text = text.replace("\\", "")
				print('TEXT (morning): ' + text)
				new_html_content += f"{text}"
		
		evening_prayer = json_data.get("services", {}).get("Evening Prayer", {}).get("readings", [])
		new_html_content += f"<h1>Evening Prayer</h1>"
		for item in evening_prayer:
			full = item.get("full", {})
			if full.get("cycle") != "30": # Skip 30 Day Cycle Items.
				print('NAME (evening): ' + full.get('name'))
				new_html_content += f"<h2>{full.get('name')}</h2>"
				text = text = full.get("text")
				text = text.replace("<html><head></head><body>", "")
				text = text.replace("</body></html>", "")
				#html.unescape(element)
				text = text.replace("\\", "")
				print('TEXT (evening): ' + text)
				new_html_content += f"{text}"
			
		new_html_content += "</body></html>"
		print('FEED ITEM CONTENT (morn and eve): ' + new_html_content)
		#return BeautifulSoup(new_html_content, 'html.parser') # Return a new BeautifulSoup object with the HTML content
		#return self.index_to_soup(new_html_content)
		return new_html_content
readabit is offline   Reply With Quote