Hallo,
I add my collection of German site file.
I started each file with the name of the file, like:
# de_*.site
This is the List of files:
de_aldi-nord.site
de_bild.site
de_bvb.site
de_cert.site
de_cyberkino.site
de_digitalkamera.site
de_digitv_premiere.site
de_eetimes.site
de_gazette.site
de_heise.site
de_heisec.site
de_heise_aktuell.site
de_heise_mobil.site
de_heise_tp.site
de_heise_tr_aktuell.site
de_heute.site
de_klack-channel.site
de_menshealth.site
de_mobile2day.site
de_palmfaq.site
de_pdassi_news.site
de_pdassi_software.site
de_rn_do.site
de_spiegel.site
de_spiegel_schlagzeilen.site
de_stern.site
de_sz_kultur.site
de_tagesschau.site
de_teltarif.site
de_tvspielfilm.site
de_wortfilter.site
de_yahoo_bvb.site
# de_aldi-nord.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 17.10.03
URL:
http://aldi-nord.de/OFFER_D/home.htm
Description: Aldi Nord Angebote
Name: Aldi-Nord
Levels: 3
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
ImageURL: .*\.JPG
UseTableSmarts: 0
IssueLinksStart: <!-- Beginn des eigentlichen Fensters 800 x 600 -->
IssueLinksEnd: <!-- Ende -->
ContentsCachable: 0
StoryCachable: 0
MinPages: 1
ContentsURL:
http://aldi-nord.de/OFFER_D/OFFER_\d+/AA_LISTE\.HTM
StoryURL:
http://aldi-nord.de/OFFER_D/OFFER_\d+/OFF\d+\.HTM
ContentsUseTableSmarts: 0
StoryUseTableSmarts: 0
TableRender: keep
# de_bild.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 14.10.03
URL:
http://mobile.bild.t-online.de/index.jsp
Name: Bild.de
Description: German Bild newspaper
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 3
ContentsDiff: 1
ImageURL: .*\.jpg
ContentsCachable: 0
StoryCachable: 1
# de_bvb.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 17.02.04
URL:
http://borussia-dortmund.lycos.de/?Z%1B%E7%F4%9D
Description: Borussia Dortmund News (Soccer)
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
ImageURL: .*\.jpg
ContentsStart:<!-- CONTENT ANFANG -->
ContentsEnd: <!-- CONTENT ENDE -->
StoryStart: inhalt_header
StoryEnd: d_oben.gif
Name: BvB
Levels: 2
# de_cert.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 12.2.03
URL:
http://cert.uni-stuttgart.de/ticker/sidebar.php
Description: German CERT Infos
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Name: CERT RUS
Levels: 2
StoryURL:
http://cert.uni-stuttgart.de/ticker/...e.php\?mid=\d+
StoryStart: <FONT SIZE="+2">
StoryEnd: Copyright © 2003 RUS-CERT, Universität Stuttgart
ContentsDiff: 1
# remove CENTER
StoryPostProcess: {
s/center//gi;
}
# de_cyberkino.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 7.5.03
URL:
http://www.cyberkino.de/entertainment/kino/monate.html
Description: German Cinema Infos
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
ContentsDiff: 1
ImageURL:
http://www.cyberkino.de/.*\.jpg
Name: Cyberkino
Levels: 2
# de_digitalkamera.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 11.9.03
URL:
http://www.digitalkamera.de/Info/
Name: German Digitalcamera.de News
Description: German Digitalcamera News
Levels: 2
ContentsStart: weiter zur nächsten Seite
ContentsEnd: Diese Seite wurde redaktionell von
ContentsCachable: 0
ContentsDiff: 1
StoryURL:
http://www.digitalkamera.de/Info/News/\d+/\d+\.htm
ImageURL:
http://images.digitalkamera.de/.*\.jpg
StoryStart: <h3>
StoryEnd: "PurpleText" preview="End-Text"
StoryCacheable: 1
StoryLifetime: 2
# de_digitv_premiere.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.8, 16.02.04 14:20
URL:
http://www.digitalfernsehen.de/tv-se...ndex_1687.html
Name: Digitv premiere news
Description: German Premiere Infos
Levels: 2
ContentsStart: <!-- Linke Navigation ENDE -->
ContentsEnd: <!-- Premiere News Snippet Ende -->
ContentsCachable: 0
ContentsDiff: 1
StoryURL:
http://www.digitalfernsehen.de/news/news\_\d+\.html
StoryStart: <!-- Newsbeitrag start -->
StoryEnd: onClick="return printwindow();
StoryCacheable: 1
StoryLifetime: 2
ImageURL:
http://www.digitalfernsehen.de/news/img/.+\.gif
# de_eetimes.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 18.02.04 10:45
URL:
http://www.eetimes.de/hr
Name: EE Times.de
Description: Weltweiter Industrie-Nachrichtendienst für Elektonikingenieure
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 2
ContentsStart: <!-- TOP STORY -->
ContentsEnd: <!-- BLACK TOP BORDER -->
ContentsDiff: 1
StoryURL: .+/news/.+
StoryStart: <!-- TOP STORY -->
StoryEnd: </STORY>
StoryCacheable: 1
# remove javascript pseudo links and <center>
StoryPostProcess: {
s/a href=.javascript
://gi;
s/<center>//gi;
}
# de_gazette.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 6.2.03
URL:
http://gazette.de/
Name: Die Gazette
Description: German plitics magazine
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 2
ImageURL: .*.jpg
StoryURL: [A-Za-z]\S+\.html
ContentsStart: bordercolor="#CCCCCC"
ContentsEnd: /Archiv/Newsletter.html
StoryToPrintableSub: {
s,([A-Z].+)(\.html),\1-print\2,
s,[A-Z],[a-z],
}
StoryPostProcess: {
s/<center>//gi;
}
# de_heise.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.9, 16.2.03
# this version is with pictures.
URL:
http://www.heise.de/
Name: Heise Newsticker
Description: German Heise IT-news
Levels: 2
ContentsStart: </HEISETEXT>
ContentsEnd: <!-- MITTE (NEWS-UEBERBLICK) -->
ContentsCachable: 0
ContentsDiff: 1
StoryURL: http://www.heise.de/newsticker/(meldung/print|meldung)/\d+
StoryStart: <HEISETEXT>
StoryEnd: </body>
StoryCacheable: 1
StoryLifetime: 2
ImageURL:
http://www.heise.de/bilder/.+
# StoryHeadline: <HEISETEXT>\s+<b>(.+)</b>
StoryToPrintableSub: s,/newsticker/meldung/(\d+),/newsticker/meldung/print/\1,
# de_heisec.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 17.02.04 11:32
URL:
http://www.heise.de/security/
Name: Heise Security
Description: German Heise Security news
Levels: 2
ContentsStart: <!-- Titel -->
# ContentsEnd: <!-- Kaesten -->
ContentsCachable: 0
ContentsDiff: 1
StoryURL: http://www.heise.de/security/(artikel/print|artikel)/\d+
StoryStart: <HEISETEXT>
StoryEnd: <!-- news-steuerung anfang -->
StoryCacheable: 1
ImageURL:
http://heise.de/mobil/artikel/.*/aufmacher\.jpg
StoryToPrintableSub: s,/security/artikel/(\d+),/security/artikel/print/\1,
# de_heise_aktuell.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 16.02.04 10:38
URL:
http://www.heise.de/ct/aktuell/
Name: Heise Aktuell
Description: German Heise Mobil-news
Levels: 2
ContentsStart: <HEISETEXT>
ContentsEnd: </HEISETEXT>
ContentsCachable: 0
ContentsDiff: 1
# StoryURL:
http://www.heise.de/ct/aktuell/meldung/\d+
StoryURL: http://www.heise.de/ct/aktuell/(meldung/print|meldung)/\d+
StoryStart: <HEISETEXT>
StoryEnd: </HEISETEXT>
StoryCacheable: 1
ImageURL:
http://www.heise.de/bilder/.*
StoryToPrintableSub: s,/ct/aktuell/meldung/(\d+),/ct/aktuell/meldung/print/\1,
# de_heise_mobil.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 23.5.03
URL:
http://heise.de/mobil/
Name: Heise Mobil
Description: German Heise Mobil-news
Levels: 2
# Ticker ignorieren:
ContentsStart: Themen
ContentsEnd: <!-- MITTE+RECHTS -->
ContentsCachable: 0
ContentsDiff: 1
StoryURL:
http://heise.de/mobil/.*/
StoryStart: <HEISETEXT>
StoryEnd: </HEISETEXT>
StoryCacheable: 1
ImageURL:
http://heise.de/mobil/artikel/.*/aufmacher\.jpg
# remove small font commands
StoryPostProcess: {
s/<font size=1>//gi;
}
# de_heise_tp.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler and Carsten Clasohm, Version 0.6, 20.11.03 09:38
# this version is with pictures.
URL:
http://www.heise.de/tp/pdanews/default.html
Name: Heise Telepolis
Levels: 2
ContentsDiff: 1
ImageURL: .*\.gif
ImageURL: .*\.jpg
}
# de_heise_tr_aktuell.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 16.02.04 10:37
URL:
http://www.heise.de/tr/aktuell/
Name: Heise TR Aktuell
Description: German Heise Technology Review
Levels: 2
ContentsStart: <HEISETEXT>
ContentsEnd: </HEISETEXT>
ContentsCachable: 0
ContentsDiff: 1
StoryURL: http://www.heise.de/tr/aktuell/(meldung/print|meldung)/\d+
StoryStart: <HEISETEXT>
StoryEnd: </HEISETEXT>
StoryCacheable: 1
ImageURL:
http://www.heise.de/bilder/.*
StoryToPrintableSub: s,/tr/aktuell/meldung/(\d+),/tr/aktuell/meldung/print/\1,
# de_heute.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 4.4.04
URL:
http://www.heute.t-online.de/ZDFheut...HOME-4,00.html
Name: heute
Description: German "heute" news
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 2
StoryURL:
http://www.heute.t-online.de/ZDFheut...ticle/.+\.html
ImageURL: .*\.(gif|jpg).*
ContentsCachable: 1
TableRender: list
#SizeLimit: 1000
# remove table commands
# StoryPostProcess: {
# s/<table.+>//gi;
#}
# de_klack-channel.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 17.2.03
URL:
http://www.klack-channel.de/channelTipps.php3?DAY=[[YYYY]][[MM]][[DD]]&USER=
Description: German TV Tipps
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Name: Klack Tagestipps
Levels: 1
# remove CENTER
StoryPostProcess: {
s/<?center>?//gi;
}
# de_menshealth.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.4, 24.09.03 10:19
URL:
http://www.menshealth.de/avantgo/
Description: German Men Magazin
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Name: Menshealth
Levels: 3
ContentsDiff: 1
ImageURL: .*\.jpg
ContentsStart: HOME</a>
ContentsURL:
http://www.menshealth.de/sixcms/deta...=d_mh_av_home_..
StoryURL:
http://www.menshealth.de/.*/\d+/d_mh_av_detail
# remove small font commands
StoryPostProcess: {
s/<font size=\"?\+?\d\"?>//gi;
}
# de_mobile2day.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 14.10.03
URL:
http://www.mobile2day.de/pdanews_all...ext=&isLimit=1
Name: mobile2day
Description: German PDA-News
AuthorName: Stefan /at/ Schwingeler.de
ContentsDiff: 1
Levels: 2
StoryPostProcess: {
s/<CENTER>//gi;
s/size=\"?\d\"?//gi;
}
# de_palmfaq.site
URL:
http://palmfaq.de
Name: PalmFAQ.de
Levels: 2
ContentsDiff: 1
StoryCacheable: 1
# de_pdassi_news.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 5.3.03
URL:
http://pdassi.de/news1.php
Name: pdassi News
Description: German Palm site
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
ContentsDiff: 1
Levels: 2
ImageURL:
http://pdassi.de/images/.*
StoryToPrintableSub: s/SID=[a-z0-9]+/SID=1/
StoryPostProcess: {
s/<small>//gi;
}
# de_pdassi_software.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.3, 5.3.03
URL:
http://pdassi.de/wcf/newuploads.php
AddURL:
http://pdassi.de/wcf/newupdates.php
AddURL:
http://pdassi.de/wcf/newprc.php
Name: pdassi Software
Description: German Palm site
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
ContentsDiff: 1
Levels: 2
ImageURL:
http://.*/util/screenshot.php\?pid=\d+.*
StoryToPrintableSub: s/SID=[a-z0-9]+/SID=1/
StoryPostProcess: {
s/align="center"//gi;
s/<small>//gi;
}
# de_rn_do.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 12.06.2003
URL:
http://www.westline.de/lokal/main.php?link=do%2F%2Fln
Name: RN Do
Levels: 2
ContentsDiff: 1
ImageURL: .*\.gif
ContentsStart: <!--beginn hauptteil-->
ContentsEnd: <!--ende hauptteil-->
StoryURL:
http://www.westline.de/lokal/mono.php.*
StoryStart: <!--beginn hauptteil-->
StoryEnd:<!--ende hauptteil-->
# de_Spiegel.site
# This is a sitescooper site file. see
http://sitescooper.cx/
# by Stefan Schwingeler, Version 0.6, 6.2.03
# History:
# "fixed" by by L****n Wulff, L****n@multimediaconnection.de
# rewritten with new PDA-link (no pics) by Stefan Schwingeler
URL:
http://www.spiegel.de/dertag/pda/ava...r140=1,00.html
Name: Der Spiegel
Description: German news magazine
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 2
StoryURL:
http://www.spiegel.de/dertag/pda/ava...tikel/.*\.html
# de_spiegel_schlagzeilen.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 18.2.03
URL:
http://www.spiegel.de/schlagzeilen/
Name: Der Spiegel Schlagzeilen
Description: German news magazine
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 2
ContentsStart: mark a:visited
ContentsEnd: OAS_RICH('Right');
StoryURL:
http://www.spiegel.de/.+/\d+,\d+,\d+,\d+\.html
StoryURL:
http://www.spiegel.de/.+/\d+,\d+,druck-\d+,\d+\.html
ImageURL:
http://www.spiegel.de/img/\d+,\d+,\d+,\d+\.jpg
StoryToPrintableSub: s:^(\S+/?\S+/0,\d+,)(\d+,\d+\.html):\1druck-\2:
# de_stern.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 5.3.04
URL:
http://www.stern.de/pda/
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=politik
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=wirtschaft
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=sport
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=kultur
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=computer
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=campus
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=wissenschaft
#AddURL:
http://www.stern.de/pda/?pda=1&rubrik=lifestyle
Name: Stern
Levels: 2
ContentsDiff: 1
ImageURL: .*\.jpg
# ContentsURL:
http://www.stern.de/pda/\?pda=1\&rubrik=.*
ContentsStart: <strong>Lifestyle</strong>
ContentsEnd: <!-- FOOTER START -->
StoryURL:
http://www.stern.de/.*/index.html\?id=\d+\&pda=1
StoryStart: Beginn des Artikels
StoryEnd: <!-- FOOTER START -->
# StoryHeadline: <div id="artikelKopf1">(.*?)</div>
StoryHeadline: <h1>(.*?)</h1>
# remove bigfont in <h2>
StoryPostProcess: {
s/<h2>//gi;
}
# de_sz_kultur.site
# This is a sitescooper site file. see
http://sitescooper.org/
# Stefan Schwingeler 2.2.04
URL:
http://www.sueddeutsche.de/kultur/ticker/
Name: SZ Kultur
Description: Ressort Münchner Kultur der Süddeutschen Zeitung
Levels: 2
ContentsStart: <!-- beginn content -->
ContentsEnd: <!--ende weiterethemen-->
StoryURL:
http://www.sueddeutsche.de/kultur/artikel/.+
StoryStart: <!-- beginn content -->
StoryEnd: <!-- ende content -->
ImageURL: .*\.jpg
# de_tagesschau.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.1, 6.2.03
URL:
http://www.tagesschau.de/mobileTS
Name: Tagesschau Mobil
Description: German news show
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 3
ImageURL: .*/image/.*\.jpg
SizeLimit: 1000
Level2Cachable: 0
Level3Cachable: 0
Level4Cachable: 0
ContentsCachable: 0
# de_teltarif.site
# This is a sitescooper site file. see
http://sitescooper.tsx.org/
# by Stefan Schwingeler, Version 0.3, 24.02.04 11:12
URL:
http://www.teltarif.de/arch/woche.html
Name: Teltarif
Levels: 2
ContentsDiff: 1
ContentsStart: <!-- Add Ad End -->
StoryURL:
http://www.teltarif.de/arch/\d\d\d\d/kw\d+/s\d+\.html
ImageURL:
http://www.teltarif.de/arch/\d\d\d\d/kw\d+/.+\.jpg
StoryStart: <!-- Add Ad End -->
StoryEnd: Ihre Meinungen und Erfahrungen
# de_tvspielfilm.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler and Carsten Clasohm, Version 1.1, 28.11.03 11:09
# modified by Stefan Schwingeler 25.04.01 11:26: UseTableSmarts: 0
URL:
http://www.tomorrow-newmedia.de/mobi.../tvs/tipps.php
Name: TV-Spielfilm
Levels: 2
ContentsDiff: 0
StoryCachable: 0
StoryURL:
http://www.tomorrow-newmedia.de/mobi...tgo/tvs/gen/.*
ImageURL: .+\.gif
StoryUseTableSmarts: 0
# de_wortfilter.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 18.02.04
URL:
http://www.wortfilter.de/news.html
Name: wortfilter.de
Description: Deutsche eBay Infos von wortfilter.de
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
Levels: 2
ContentsDiff: 1
StoryURL:
http://www.wortfilter.de/News/news\d+.html
StoryStart: <h1>
StoryEnd: alt="Vorherige Meldung"
# remove center commands
StoryPostProcess: {
s/align="center"//gi;
}
# de_yahoo_bvb.site
# This is a sitescooper site file. see
http://sitescooper.org/
# by Stefan Schwingeler, Version 0.2, 24.09.03 10:19
URL:
http://de.sports.yahoo.com/foot/germ/t/dort
Name: Yahoo BvB
Levels: 2
Description: Yahoo Bvb News
AuthorName: Stefan Schwingeler
AuthorEMail: stobs /at/ web . de
ContentsDiff: 1
ImageURL: .*\.jpg
StoryURL:
http://de.sports.yahoo.com/\d+/\d+/.*.html
ContentsStart: >Verein</a>
ContentsEnd: Durchsuchen Sie das Archiv
ContentsDiff: 1
StoryStart: Tageshöhepunkte
StoryEnd: >Diskutieren Sie über Fußball</a>
# remove small font commands
StoryPostProcess: {
s/<size=-?\+?\d>//gi;
s/<center>//gi;
}
#eof