#!/bin/sh
#
#       License: GPLv2
#       Author: review (http://www.mobileread.com/forums/member.php?u=88891)
#       Author: fortwienix (http://www.mobileread.com/forums/member.php?u=109732)
#
/ebrmain/bin/netagent connect
cd /mnt/ext1
agency=Stern-$(date +%Y-%m-%d).html

if [ -d myrss-tmp ]; then 
	rm -rf myrss-tmp;
fi

mkdir myrss-tmp;
cd myrss-tmp;

# where is the progressbar app?
if [ -e /mnt/ext1/applications/tools/progressb.app ]; then
  PRG_APP=/mnt/ext1/applications/tools/progressb.app
fi
if [ -e ../progressb.app ]; then
  PRG_APP=../progressb.app
fi

echo '<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/></head><body>' > $agency

echo "0" > percent
touch feeds

# see for a list of feeds at: http://www.stern.de/sonst/rss-die-sternde-rss-feeds-517321.html
# If you want to add any other feed, add an additional
# line above the feed url below. Each line except the
# last line must end with a backslash before the linebreak character
for link in \
http://www.stern.de/feed/standard/all/
do
  wget -O feed $link
  cat feed|
  sed 's|<link>|\n<link>|g;s|</link>|</link>\n|g' |
  grep "<link>"|
  sed 's|<link>||g;s|</link>||g;'|
  grep .html |
  sed 's|\.html|-print.html|g' |
  sed 's|#.*$||g' >> feeds
done

tail -f percent| ${PRG_APP} &
total=$(grep -c ^ feeds)
current=0;
cat feeds|
while read line; do
  current=$((current+1));
  echo $current $total|awk '{printf "%.0f\n",$1*100/$2}' >> percent

  rm article-raw
  wget -O article-raw $line
  
  cat article-raw | sed 's|<h1 id="div_article_headline"|START_ARTICLE\n<h1|g' > article-raw2
  begin=$(cat article-raw2 |grep -n 'START_ARTICLE' |head -1|awk -F: '{print $1}')
  if [ "$begin " = " " ]; then
    continue
  fi
  begin=$((begin+1))
  cat article-raw2 | sed -n "${begin},\$p" > article-raw3
  
  end=$(cat article-raw3 | grep -n '<div class="moduleL4">' |tail -1|awk -F: '{print $1}') 
  if [ "$end " = " " ]; then
    continue
  fi

  # these are two divs that are closed imediately after the headline
  echo '<div><div>' >> $agency 
  # write the article content and transform some format nicely
  # into something useful that is not bothering the fbreader
  cat article-raw3 | sed "${end},\$d" |
    sed 's|<h[3-5]>|<br/><b>|g;s|</h[3-5]>|</b><br/>|g' |
    sed 's|<div class="boxImage"|\nIMGBOX__DIV|' | sed 's|</div>|</div>\n|' | grep -v IMGBOX__DIV |
    awk 'BEGIN{RS="</script>"}/<script/{gsub("<script.*","")}{print}END{if(RS=="")print}' >> $agency

done
echo "100" >> percent

echo '</body></html>' >> $agency
mv $agency ../.
cd ..
rm -rf myrss-tmp
/ebrmain/bin/netagent disconnect
exit 0