sitescooper_archive.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.1, 09.11.1999 # Thanks Stefan! # URL: http://groups.yahoo.com/group/sitescooper-archive/ Name: Sitescooper Archive Levels: 2 ContentsStart: <font.*>date</font> ContentsEnd: </table> ContentsCachable: 0 StoryURL: http://groups.yahoo.com/group/sitescooper-archive/\d+\.html\? StoryStart: Subject: StoryEnd: alt="Previous" StoryCacheable: 1 # rm center StoryPostProcess: { s/v?align=center//gim; }
sitescooper_changes.site:
URL: http://sitescooper.org/devel/LATEST_CHANGES.html Name: Sitescooper Latest Changes Description: the Sitescooper development change log Levels: 1 StoryDiff: 1 UseTableSmarts: 0 TableRender: flatten
bsdtoday.site:
URL: http://www.bsdtoday.com/ Name: BSD Today Description: Your Daily Source for BSD News and Information Levels: 2 UseTableSmarts: 0 TableRender: flatten ContentsStart: <img src="/images/black.gif" width="1" height="550"> ContentsEnd: <b>Resources</b><br> StoryURL: http://www.bsdtoday.com/\d+/.*\d+.html StoryStart: <img src="/images/black.gif" width="1" height="550"> StoryEnd: <b>Please share your comments.</b>
openbsd_journal.site:
URL: http://undeadly.org/ Name: OpenBSD Journal Levels: 2 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.ph StoryURL: .*action=article.* StoryToPrintableSub: s/(sid=\d+$)/\1\&mode=flat/ ContentsStart: About : ContentsEnd: <b>Features</b>
oreillynet_bsd.site:
URL: http://www.onlamp.com/bsd/ Name: O'Reilly Net BSD Levels: 2 ContentsStart: -- BSD Lede -- ContentsEnd: -- digest -- StoryURL: /pub/a/bsd/[[YYYY]]/\d+/\d+/\S+.html(|\?page=\d+) StoryStart: -- content here -- StoryEnd: -- footer area -- StoryFollowLinks: 1
businessweek.site:
URL: http://pda.businessweek.com/index.htm Name: BusinessWeek Online Levels: 3 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.ph StoryURL: .*\.(htm|html) StoryURL: /list/.*\.htm StoryURL: /.*/.*/.*/.*\.htm StorySkipURL: /ads/contents.htm ImageURL: /common_images/.*\.gif
cnn_financial.site:
# CNN Financial URL: http://wireless.cnn.com/avantgo/CNNMONEY/en/channel.html # created from PODS file by David A. Desrosiers AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: CNN Financial Levels: 2 ImageURL: .*\.gif ImageScaleToMaxWidth: 150 ContentsCachable: 0 StoryURL: http://wireless.cnn.com/avantgo/CNNMONEY/en/stories/.* StoryCachable: 1
fuckedcompany.site:
# AuthorName: jm # # I love this site, just for the author's pure schadenfreude! # URL: http://www.fuckedcompany.com/ Name: Fucked Company Description: the dot-com deadpool StoryStart: <img src="images/recent_groove.gif" width=402 height=2></td> StoryEnd: <td> <a href="archives">View more headlines</a></td>
industry_week.site:
URL: http://www.industryweek.com/avantgo/ Name: Industry Week Levels: 2 ContentsPrint: 1 ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
motley-fool.site:
URL: http://www.fool.com/xml/foolnews_rss091.xml Name: The Motley Fool Description: To Educate, Amuse, and Enrich ContentsFormat: rss StoryURL: /.*\.htm StoryEnd: <A NAME="NUMBERS"> StoryStart: <BODY # as dictated in http://www.fool.com/help/FoolsRules.htm Rights: Copyright 1996-2000 The Motley Fool. All rights reserved. MinPages: 2
the_economist.site:
URL: http://www.economist.com/index.html?nonNA=1 Name: Economist Description: Economist AuthorName: Goh Boon Nam # Version 1.2 # Date updated : 30 Dec 2004 # Changes made : Change of URL + Remove Subscription-only pages which cause problem to Plucker Levels: 2 ContentsStart: <td colspan="7" width="447" valign="top"> ContentsEnd: Only one answer is correct ContentsUseTableSmarts: 0 StoryToPrintableSub: s!displayStory.cfm!PrinterFriendly.cfm! StoryURL: http://www.economist.com/(.*?)/PrinterFriendly.cfm(.*?) #This image is the icon to indicate story not available ImageURL: http://www.economist.com/images/dingbats/e5.gif ContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; } StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="wholepage" style="visibility: hidden">(.*?)<\/noscript>//gis; }
lazarus_at_large.site:
# site_samples/business/lazarus_at_large.site # # SF Chronicle Columnists : David Lazarus, "Lazarus at Large" # by Akkana Peck URL: http://sfgate.com/cgi-bin/search/columnists.cgi?waisdbname=/chronicle/&byline=David+Lazarus Name: Lazarus at Large Levels: 2 ContentsStart: <INPUT TYPE="submit" VALUE="View Archive"> ContentsDiff: 1 StoryURL: http://sfgate.com/cgi-bin/article.cgi.* StoryStart: <!-- end #additionalcontent --> StoryEnd: <!-- END STORY -->
darkhorizons.site:
# Author: MMiller /at/ media-general.com URL: http://www.darkhorizons.com/news-n.htm Name: Dark Horizons Levels: 1 ContentsStart: UPDATE: ContentsEnd: HR WIDTH=40% StoryCacheable: 0 ContentsDiff: 0
ebert_1min.site:
# roger_ebert_1min.site # AuthorName: Alan Hoyle <alan@alanhoyle.com> # # Reads the Roger Ebert One Minute Movie Reviews URL: http://www.suntimes.com/output/minmovie/movie0.html Name: Roger Ebert One-Minute Reviews Description: Roger Ebert's One-Minute Movie Reviews Levels: 2 Category: Daily ContentsStart: Begin Content ContentsEnd: End Content StoryURL: .*ebert_reviews/.*\.html StoryCacheable: 1 StoryHeadline: <h2>(.*)</h2> StoryStart: Begin Review StoryEnd: End Content Rights: Copyright © Chicago Sun-Times Inc.
ebert_answer_man.site:
# ebert_answer_man.site # Roger Ebert's Movie Answer Man weekly Q&A column URL: http://www.suntimes.com/index/answ-man.html Name: Roger Ebert: Movie Answer Man Description: Roger Ebert's Movie Answer Man weekly Q&A column Levels: 2 ContentsStart: <!-- Begin Content --> StoryURL: .*answ-man/.*\.html StoryCacheable: 1 StoryHeadline: <h2>(.*)</h2> StoryStart: <!-- Begin Content --> StoryEnd: <!-- End Content -->
ebert_features.site:
# ebert_features.site # Roger Ebert's Movie Feature Articles URL: http://www.suntimes.com/index/ebfeatures.html Name: Roger Ebert: Interviews-essays-festivals Description: Roger Ebert's movie feature articles Levels: 2 ContentsStart: <!-- Begin Content --> StoryURL: .*eb-feature/.*\.html StoryCacheable: 1 StoryHeadline: <h2>(.*)</h2> StoryStart: <!-- Begin Content --> StoryEnd: <!-- End Content -->
ebert_great_movies.site:
# ebert_great_movies.site # Roger Ebert's "The Great Movies" URL: http://www.suntimes.com/ebert/greatmovies/index.html Name: Roger Ebert: The Great Movies Description: Roger Ebert's regular "The Great Movies" feature Levels: 2 ContentsStart: <!-- Begin Content --> ContentsDiff: 1 StoryURL: .*ebert/greatmovies/.*\.html StoryCacheable: 1 StoryHeadline: <h[12]>(.*)</h[12]> StoryStart: <!-- Begin Content --> StoryEnd: <!-- End Content -->
filthy_critic.site:
URL: http://bigempire.com/filthy/ Name: The Filthy Critic Levels: 1 StoryStart: <TD WIDTH="440" VALIGN="TOP"> StoryEnd: </HTML>
imdb_studio_briefing.site:
# IMDB.com Movie/TV news # Author: Jan Lund Thomsen <kwed@kwed.org> URL: http://us.imdb.com/StudioBrief/ Name: IMDB Movie/TV news Levels: 1 AuthorName: Jan Lund Thomsen AuthorEmail: kwed@kwed.org StoryStart: <!-studiodate --> StoryEnd: <A HREF="mailto:studiobrf@aol.com">Studio Briefing</A> Edited by <A HREF="http://members.aol.com/studiobrf/lewirwin/lewsbio.html">Lew Irwin</A>
roger_ebert.site:
# roger_ebert.site # AuthorName: Justin Henry <jhenry@fjicl.com> # Modified: Alan Hoyle <alan /at/ alanhoyle.com> # # Modified to read the Ebert review index page, and to deal # with a new SunTimes page format # Modified to exclude extraneous bottom of page stuff. URL: http://www.suntimes.com/index/ebert1.html Name: Roger Ebert Reviews Description: Roger Ebert's Movie Reviews Levels: 2 Category: Daily ContentsStart: <!-- Begin Content --> ContentsEnd: End Content StoryURL: .*ebert1/.*\.html StoryCacheable: 1 StoryHeadline: <h[12]>(.*)</h[12]> StoryStart: <!-- Begin Content --> StoryEnd: End Content
variety.site:
URL: http://www.variety.com/channel Name: Variety.Com Levels: 2 ContentsPrint: 1 ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
apartment_3g.site:
URL: http://www.kingfeatures.com/features/comics/apt3g/aboutMaina.php Name: Apartment 3-G StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Apartment_3-G.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
baby_blues.site:
URL: http://www.kingfeatures.com/features/comics/babyblue/aboutMaina.php Name: Baby Blues StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Baby_Blues.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
barney_google_and_snuffy_smith.site:
URL: http://www.kingfeatures.com/features/comics/bgoogle/aboutMaina.php Name: Barney Google and Snuffy Smith StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Barney_Google.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
beetle_bailey.site:
URL: http://www.kingfeatures.com/features/comics/bbailey/aboutMaina.php AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Beetle Bailey StoryStart: <!--CMS NAME="image"--> StoryEnd: by <!--CMS NAME="author" ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Beetle_Bailey.* ImageScaleToMaxWidth: 500
better_half.site:
URL: http://www.kingfeatures.com/features/comics/bethalf/aboutMaina.php Name: The Better Half StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Better_Half.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
between_friends.site:
URL: http://www.kingfeatures.com/features/comics/bfriends/aboutMaina.php Name: Between Friends StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Between_Friends.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
blondie.site:
URL: http://www.kingfeatures.com/features/comics/blondie/aboutMaina.php Name: Blondie StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Blondie.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
boondocks.site:
URL: http://www.ucomics.com/boondocks/ AuthorName: Ignatz Sol [iggy /at/ mechanolatry.com] Name: Boondocks StoryStart: <!--- comics view content ---> StoryEnd: <!--calendar--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://images.ucomics.com/comics/bo/200\d/bo.* #ImageScaleToMaxWidth: 450 UseTableSmarts: 0
buckles.site:
URL: http://www.kingfeatures.com/features/comics/buckles/aboutMaina.php Name: Buckles StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Buckles.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
calvin_and_hobbes.site:
URL: http://www.ucomics.com/calvinandhobbes/viewch.cfm AuthorName: Marko Bozikovic <redbyron /at/ fly.srk.fer.hr> modified by Gary Paulson Name: Calvin and Hobbes StoryStart: <!-- end comic nav --> # ?did not work? StoryStart: \gtimg src="http://a828.g.akamai.net StoryEnd: <!--calendar--> ImageOnlySite: 1 ImageURL: .*/ch/\d\d\d\d/ch.*\.gif ImageScaleToMaxWidth: 550 StoryHTMLPreProcess: { s!<a href..http.//www.ucomics.com/shopping/buycomic.cfm.uc_fn=1.uc_full_date=\d+?.uc_daction.X.uc_comic=ch.>!!gsi; }
crock.site:
URL: http://www.kingfeatures.com/features/comics/crock/aboutMaina.php Name: Crock StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Crock.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
curtis.site:
URL: http://www.kingfeatures.com/features/comics/curtis/aboutMaina.php Name: Curtis StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Curtis.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
dennis_the_menace.site:
URL: http://www.kingfeatures.com/features/comics/dennis/aboutMaina.php AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Dennis the Menace StoryStart: <!--CMS NAME="image"--> StoryEnd: by <!--CMS NAME="author" ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Dennis_The_Menace.* ImageScaleToMaxWidth: 500
dilbert.site:
URL: http://www.dilbert.com/comics/dilbert/ AuthorName: Kevin L. Dupree <kdupree /at/ flash.net> Name: Dilbert StoryStart: COMIC STRIP BEGIN StoryEnd: COMIC STRIP END StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.dilbert.com/comics/dilbert/archive/images/.* ImageScaleToMaxWidth: 450 UseTableSmarts: 0 # add size info so sitescooper knows to make it into a # link for Plucker. StoryHTMLPreProcess: { s/ALT="Today.s Dilbert Comic"/ ALT="Today.s Dilbert Comic" WIDTH=600 HEIGHT=211 /gs; }
dinette_set.site:
URL: http://www.kingfeatures.com/features/comics/dinette/aboutMaina.php Name: The Dinette Set StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Dinette_Set.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
doonesbury.site:
URL: http://www.doonesbury.com/strip/dailydose/index.html AuthorName: Ignatz Sol [iggy /at/ mechanolatry.com] Name: Doonesbury StoryStart: no next date StoryEnd: dose_feature_menu4_01.gif StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://images.ucomics.com/comics/db/200\d/db.* #ImageScaleToMaxWidth: 500 UseTableSmarts: 0
edge_city.site:
URL: http://www.kingfeatures.com/features/comics/edgecity/aboutMaina.php Name: Edge City StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Edge_City.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
family_circus.site:
URL: http://www.kingfeatures.com/features/comics/familyc/aboutMaina.php Name: Family Circus StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/familyc/fct.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ rustymail.com
flash_gordon.site:
URL: http://www.kingfeatures.com/features/comics/fgordon/aboutMaina.php Name: Flash Gordon StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/fgordon/fg.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
funky_winkerbean.site:
URL: http://www.kingfeatures.com/features/comics/fwinker/aboutMaina.php Name: Funky Winkerbean StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Funky_Winkerbean.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
grin_and_bear_it.site:
URL: http://www.kingfeatures.com/features/comics/grinbear/aboutMaina.php Name: Grin and Bear It StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Grin_and_Bear_It.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
hagar_the_horrible.site:
URL: http://www.kingfeatures.com/features/comics/hagar/aboutMaina.php AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Hagar the Horrible StoryStart: <!--CMS NAME="image"--> StoryEnd: by <!--CMS NAME="author" ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Hagar_The_Horrible.* ImageScaleToMaxWidth: 450
hazel.site:
URL: http://www.kingfeatures.com/features/comics/hazel/aboutMaina.php Name: Hazel StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/hazel/hat.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
henry.site:
URL: http://www.kingfeatures.com/features/comics/henry/aboutMaina.php Name: Henry StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/henry/het.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
hi_and_lois.site:
URL: http://www.kingfeatures.com/features/comics/hi_lois/aboutMaina.php Name: Hi and Lois StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Hi_and_Lois.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
judge_parker.site:
URL: http://www.kingfeatures.com/features/comics/jparker/aboutMaina.php Name: Judge Parker StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Judge_Parker.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
katzenjammer_kids.site:
URL: http://www.kingfeatures.com/features/comics/katzkids/aboutMaina.php Name: The Katzenjammer Kids StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/katzkids/kk.*\.jpg ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
lockhorns.site:
URL: http://www.kingfeatures.com/features/comics/lockhorn/aboutMaina.php Name: The Lockhorns StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Lockhorns.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
mallard_fillmore.site:
URL: http://www.kingfeatures.com/features/comics/mallard/aboutMaina.php Name: Mallard Fillmore StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Mallard_Fillmore.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
mandrake_the_magician.site:
URL: http://www.kingfeatures.com/features/comics/mandrake/aboutMaina.php Name: Mandrake the Magician StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/mandrake/mmt.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
mark_trail.site:
URL: http://www.kingfeatures.com/features/comics/mtrail/aboutMaina.php Name: Mark Trail StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Mark_Trail.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
marvin.site:
URL: http://www.kingfeatures.com/features/comics/marvin/aboutMaina.php Name: Marvin StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Marvin.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
mary_worth.site:
URL: http://www.kingfeatures.com/features/comics/mworth/aboutMaina.php Name: Mary Worth StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Mary_Worth.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
moose_and_molly.site:
URL: http://www.kingfeatures.com/features/comics/moosemol/aboutMaina.php Name: Moose and Molly StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/moosemol/mot.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
mutts.site:
URL: http://www.kingfeatures.com/features/comics/mutts/aboutMaina.php Name: Mutts StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Mutts.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
norm.site:
URL: http://www.kingfeatures.com/features/comics/thenorm/aboutMaina.php Name: The Norm StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Norm.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
on_the_fastrack.site:
URL: http://www.kingfeatures.com/features/comics/fastrack/aboutMaina.php Name: On The Fastrack StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Fast_Track.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
phantom.site:
URL: http://www.kingfeatures.com/features/comics/phantom/aboutMaina.php Name: The Phantom StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Phantom.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
piranha_club.site:
URL: http://www.kingfeatures.com/features/comics/piranha/aboutMaina.php Name: The Piranha Club StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Piranha.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
popeye.site:
URL: http://www.kingfeatures.com/features/comics/popeye/aboutMaina.php AuthorName: Marko Bozikovic <redbyron /at/ fly.srk.fer.hr> Name: Popeye StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Popeye.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean (revision) AuthorEmail: yoonfui /at/ bigfoot.com
prince_valiant.site:
URL: http://www.kingfeatures.com/features/comics/pvaliant/aboutMaina.php Name: Prince Valiant StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/pvaliant/val.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
redeye.site:
URL: http://www.kingfeatures.com/features/comics/redeye/aboutMaina.php Name: Redeye StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Redeye.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
rex_morgan_md.site:
URL: http://www.kingfeatures.com/features/comics/rmorgan/aboutMaina.php Name: Rex Morgan M.D. StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Rex_Morgan.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
rhymes_with_orange.site:
URL: http://www.kingfeatures.com/features/comics/orange/aboutMaina.php Name: Rhymes With Orange StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Rhymes_with_Orange.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
safe_havens.site:
URL: http://www.kingfeatures.com/features/comics/safehavn/aboutMaina.php Name: Safe Havens StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Safe_Havens.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
sally_forth.site:
URL: http://www.kingfeatures.com/features/comics/sforth/aboutMaina.php Name: Sally Forth StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Sally_Forth.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
sam_and_silo.site:
URL: http://www.kingfeatures.com/features/comics/sam_silo/aboutMaina.php Name: Sam and Silo StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/sam_silo/sst.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
shermans_lagoon.site:
URL: http://www.kingfeatures.com/features/comics/lagoon/aboutMaina.php Name: Sherman's Lagoon StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Shermans_Lagoon.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
six_chix.site:
URL: http://www.kingfeatures.com/features/comics/sixchix/aboutMaina.php Name: Six Chix StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/6Chix.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
slylock_fox.site:
URL: http://www.kingfeatures.com/features/comics/slylock/aboutMaina.php Name: Slylock Fox StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Slylock.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
spiderman.site:
URL: http://www.kingfeatures.com/features/comics/spidermn/aboutMaina.php AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: The Amazing Spiderman StoryStart: <!--CMS NAME="image"--> StoryEnd: by <!--CMS NAME="author" ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Spiderman.* ImageScaleToMaxWidth: 500
steve_roper_and_mike_nomad.site:
URL: http://www.kingfeatures.com/features/comics/sroper/aboutMaina.php Name: Steve Roper and Mike Nomad StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Steve_Roper.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
tedrall.site:
URL: http://www.ucomics.com/rallcom/ AuthorName: Ignatz Sol [iggy /at/ mechanolatry.com] Name: Ted Rall StoryStart: no next date StoryEnd: Get Ted Rall by e-mail StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://images.ucomics.com/comics/trall/200\d/tr.* #ImageScaleToMaxWidth: 450 UseTableSmarts: 0
theyll_do_it_every_time.site:
URL: http://www.kingfeatures.com/features/comics/theydoit/aboutMaina.php Name: They'll Do It Every Time StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/TDIE.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
thismodernworld.site:
URL: http://www.thismodernworld.com/ Name: This Modern World Description: This Modern World by Tom Tomorrow Levels: 1 StoryDiff: 1 # thx to Adrian Colley <aecolley AT spamcop net>
tiger.site:
URL: http://www.kingfeatures.com/features/comics/tiger/aboutMaina.php Name: Tiger StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Tiger.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
trudy.site:
URL: http://www.kingfeatures.com/features/comics/trudy/aboutMaina.php Name: Trudy StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.kingfeatures.com/features/comics/trudy/trt.*\.gif ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
tumbleweeds.site:
URL: http://www.kingfeatures.com/features/comics/tumblewd/aboutMaina.php Name: Tumbleweeds StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Tumbleweeds.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
user_friendly.site:
URL: http://www.userfriendly.org/static/ AuthorName: Kevin L. Dupree <kdupree /at/ flash.net> Name: User Friendly StoryStart: <!--Start Current Strip--> StoryEnd: <!--End Strip--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: /cartoons/archives/.*\.gif ImageScaleToMaxWidth: 550
zippy_the_pinhead.site:
URL: http://www.kingfeatures.com/features/comics/zippy/aboutMaina.php Name: Zippy The Pinhead StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Zippy_the_Pinhead.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
zits.site:
URL: http://www.kingfeatures.com/features/comics/zits/aboutMaina.php Name: Zits StoryStart: <!--CMS NAME="image"--> StoryEnd: <!--/CMS--> StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://est.rbma.com/content/Zits.* ImageScaleToMaxWidth: 500 AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
world_new_york.site:
URL: http://www.worldnewyork.net/ Name: World New York Description: Links to, and extracts from, quality writing on the web Levels: 2 ContentsStart: <!-- Weblog entries --> ContentsEnd: !-- Link to RSS Syndication page --> StoryURL: http://www.worldnewyork.net/comments.php\?id=\S+ StoryStart: <div class="lgbody"><p> StoryEnd: <h4>COMMENTS</h4> # site file author details AuthorName: Justin Mason AuthorEmail: jm@jmason.org # This site gets bonus points for linking to the palm version as the # "AvantGo/SiteScooper/Palm Version" in its early days ;)
oracularities.site:
URL: http://www.cs.indiana.edu/hyplan/oracle/latest.html Name: The Internet Oracle Levels: 2 ContentsStart: <body> ContentsEnd: </body> StoryURL: http://www.cs.indiana.edu/hyplan/oracle/digests/.*\.html StoryStart: <body> StoryEnd: </body> StoryHTMLPreProcess: { s/<form/<ignore/g; s/<\/form>/<\/ignore>/g; } MinPages: 2
wingmail.site:
# From: artwells <artwells@artwells.com> URL: http://www.artwells.com/oracula/web/serve-wing.php?wingrequest=[[YYYY]][[MM]][[DD]] Name: Wingmail Daily Levels: 1
gamasutra_features.site:
URL: http://www.gamasutra.com/features/ Name: GamaSutra Levels: 2 ContentsStart: .BeginEditable "content" ContentsEnd: -- .BeginLibraryItem "/Library/.*_footer.lbi" -- StoryStart: -- .BeginEditable "main.20content" StoryEnd: -- .BeginLibraryItem "/Library/.*_footer.lbi" -- # We only read linked stories for features, not for newswire items. # ah shaggit, let's get the newswires too. StoryURL: .*(features|newswire)/.*\.htm.* # Need to follow links into other story pages StoryFollowLinks: 1 StoryHeadline: <title>Gamasutra - \S+ - (.*?) \[.*?\]\s*</title>
gamedev_net.site:
URL: http://www.gamedev.net/xml/ Name: GameDev.net Description: Maximum Game Development! ContentsFormat: rss StoryURL: /info/news/fullstory.asp.*
happypenguin.site:
URL: http://www.happypenguin.org/news Name: Linux Game Tome Description: The latest Linux game news Levels: 2 ContentsStart: <form method="GET" action="http://happypenguin.org/news"> ContentsEnd: <a href="http://happypenguin.org/news?start=10"> StoryURL: http://.*happypenguin.org/show.* StoryStart: <tr bgcolor=#000080><td width="20" valign=top align=left><img src="http://happypenguin.org/images/tl.gif" width=20 height=20 alt=""></td> StoryEnd: </HTML> ContentsUseTableSmarts: 0 StoryUseTableSmarts: 0 TableRender: flatten
oldmanmurray.site:
URL: http://www.oldmanmurray.com/ Name: Old Man Murray Description: Game news and reviews with a thoroughly nasty flavour TableRender: flatten Levels: 2 ContentsStart: Make sure to check to the left for all the latest on OldManMurray.com</SMALL></TD> StoryURL: http://www.oldmanmurray.com/(features|shortreviews|longreviews|seanbaby)/.*html.* StoryURL: http://www.oldmanmurray.com/realnews.wcs StoryFollowLinks: 1 StoryStart: src="http://www.oldmanmurray.com/logoimages/ugologo\S+.gif"
bofh-2k+1.site:
URL: http://www.theregister.co.uk/content/30/25244.html Name: 2001: A BOFH Odyssey Description: Bastard Operator From Hell: 2001 Edition AuthorName: Barry Dexter A. Gonzaga AuthorEmail: barryg /at/ kssp.upd.edu.ph Levels: 2 StoryURL: /content/archive/\d+\.html ContentsStart: <HR> ContentsEnd: <BR></DIV>.<DIV><IMG.SRC= StoryStart: <HR> StoryEnd: <BR></DIV>.<DIV><IMG.SRC= StoryHTMLPreProcess: { s/<DIV CLASS=.storyhead.>(.*?)<\/DIV>/<H2>$1<\/H2>/is; s/<DIV CLASS=.storybyline.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.indexposted.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.storybody.><b>(.*?)<\/b>/<H4>$1<\/H4>/is; s/<br>.<br>(.*?)<br>.<br>/<\/p><p>$1<\/p><p>/gs; } StoryPostProcess: { s/<b><b>//is; s/<i><i>//is; s/<\/H4>.<\/p>/<\/H4>/is; }
bofh-2k.site:
URL: http://www.theregister.co.uk/content/30/15804.html Name: BOFH 2K: The Kit and caboodle Description: Bastard Operator From Hell: 2000 Edition AuthorName: Barry Dexter A. Gonzaga AuthorEmail: barryg /at/ kssp.upd.edu.ph Levels: 2 StoryURL: /content/\d+/\d+\.html ContentsStart: <HR> ContentsEnd: <BR></DIV>.<DIV><IMG.SRC= StoryStart: <HR> StoryEnd: <BR></DIV>.<DIV><IMG.SRC= StoryHTMLPreProcess: { s/<DIV CLASS=.storyhead.>(.*?)<\/DIV>/<H2>$1<\/H2>/is; s/<DIV CLASS=.storybyline.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.indexposted.>(.*?)<\/DIV>/<H3>$1<\/H3>/is; s/<DIV CLASS=.storybody.><b>(.*?)<\/b>/<H4>$1<\/H4>/is; s/<br>.<br>(.*?)<br>.<br>/<\/p><p>$1<\/p><p>/gs; } StoryPostProcess: { s/<b><b>//is; s/<i><i>//is; s/<\/H4>.<\/p>/<\/H4>/is; }
bofh.site:
# Bastard Operator from Hell URL: http://www.theregister.co.uk/content/30/index.html Name: BOFH Levels: 2 StoryURL: /content/\d+/\d+\.html StoryCacheable: 1 MinPages: 2 StoryUseTableSmarts: 0 ContentsUseTableSmarts: 0 ContentsStart: <IFRAME SRC=.http://ad.uk.doubleclick.net/ ContentsEnd: <TD WIDTH="150" ALIGN="right" VALIGN="top"> StoryHTMLPreProcess: { s/<DIV CLASS=.story_head.>(.*?)<\/DIV>/<H2 CLASS='story_head'>$1<\/H2>/is; s/<br>.<br><B>Related (?:[sS]tory|[sS]tories|[lL]ink|[lL]inks)<\/B>.*\Z//s; s/<br>+/<br>/i; s/<br><p>(?:<br>)*/<p>/i; } MinPages: 2 AuthorName: Robert Edmonds <stu@brainfood.com>
bofh_archive.site:
# Bastard Operator from Hell official archive URL: http://bofh.ntk.net/Bastard.html AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Bastard Operator from Hell Levels: 2 StoryURL: .*\.html StoryCachable: 1
dave_barry.site:
URL: http://www.miami.com/mld/miamiherald/living/columnists/dave_barry/ Name: Dave Barry Description: Dave Barry's column for the Miami Herald AuthorName: (update) Alan Hoyle <alan /at/ alanhoyle.com> Levels: 2 ContentsStart: <td class="smalltitle" nowrap="nowrap">LATEST COLUMN ContentsEnd: rightrail #StoryURL: .*/dave_barry/.*\.htm #StoryURL: .*/gift_guide/.*\.htm StoryURL: .*\.htm StoryStart: begin body-content StoryEnd: end body-content StoryHeadline: <h1>(.*?)</h1> ContentsHTMLPreProcess: { s/(<td><hr size=\"1\" color=\"\#cccccc\" width=\"98\%\"><\/td>)//gm; s/(ADVERTISEMENT)//gm; s/^.*(Get in touch).*$//gm; s/^.*(davebarry).*$//gm; s/^.*(weird_news).*$//gm; s/^.*(vertdotline).*$//gm; } StoryHTMLPreProcess: { s/^.*(byline).*$//gm; s/^.*(Read more).*$//gm; }
jon_carroll.site:
# site_samples/humor/jon_carroll.site # # San Francisco Chronicle : Columnists : Jon Carroll URL: http://www.sfgate.com/columnists/carroll/ Name: Jon Carroll Levels: 2 AuthorName: Jan Lund Thomsen AuthorEmail: kwed@kwed.org ContentsStart: <!-- \*\*\*\*\* BEGIN COLUMN RESULTS HERE \*\*\*\*\* --> ContentsEnd: <!-- \*\*\*\*\* END COLUMN RESULTS HERE \*\*\*\*\* --> StoryURL: http://www.sfgate.com/cgi-bin/article.cgi.* StoryToPrintableSub: s/(.+)/$1\&type=printable/ StoryStart: <hr size="1" align="left"> #StoryStart: <!-- end #additionalcontent --> #StoryEnd: <!-- END STORY -->
pigdog.site:
# Pigdog Journal URL: http://www.pigdog.org/pigdog.rdf Name: Pigdog Journal Description: The Online Handbook of Bad People of the Future ContentsFormat: rss StoryURL: /.*.s?html? StoryStart: Feedback<br> StoryEnd: <td background="images/rightborder.gif"> ContentsStart: <item> AuthorName: Robert Edmonds <stu@brainfood.com>
satirewire.site:
URL: http://www.satirewire.com/ Name: SatireWire Description: New Satire for the New Economy Levels: 2 ContentsStart: <table border="0" cellpadding="4" cellspacing="2" align="right" width="125"> ContentsEnd: <a href=".top">Back to Top</a> ContentsUseTableSmarts: 0 StoryURL: http://www.satirewire.com/(../)?(briefs|features|news)/\S+html StoryStart: ======= BODY ======= StoryEnd: Start of Recommend-It Code StoryHeadline: <title>SatireWire \| (.*?)</title> URLProcess: { s,http://www.satirewire.com/../,http://www.satirewire.com/,; }
the_onion.site:
# the_onion.site # includes all stories on far left, but not the images. # includes Statshot and Infographic images URL: http://www.theonion.com/ Name: The Onion Levels: 3 IssueLinksStart: <FRAMESET IssueLinksEnd: <NOFRAMES> UseTableSmarts: 0 ContentsStart: <!-- Side-News Content End Begin --> ContentsEnd: <!-- STATshot Link End --> ContentsURL: http://www.theonion.com/onion\d+/index\d*\.html ContentsHTMLPreProcess: { s/<.-- News Archives Begin -->.*?<.-- News-In-Brief Begin -->//gs; } StoryStart: Header End --> StoryEnd: Footer Begin --> StoryURL: .*\.html # there's a link button in the left column, along with all the archives StorySkipURL: http://www.theonion.com/(info/onion_link.html|archive/.*) # comment out these 2 lines to exclude the statshot and infographic images ImageURL: .*statshot.*\.gif ImageURL: .*infograph.*\.gif MinPages: 2
javaworld.site:
# Javaworld.site # Thanks to Glenn Proctor <glenn@docproc.com> and Lim Swee Tat <st_lim@3ui.com> URL: http://www.javaworld.com/ Name: JavaWorld Levels: 2 Description: Java programming articles. # Mark contents ContentsStart: -- content cell -- ContentsEnd: -- end content cell -- StoryURL: /javaworld.*/j.*/j.*\.html # Set StoryToPrintableSub appropriately - since there's only one modification # this is fairly straightforward ... ImageURL: /javaworld.*/j.*/.*.gif StoryToPrintableSub: s,\.html,_p.html, # Only get the text between the comments StoryStart: -- begin body text -- StoryEnd: -- end body text -- # Set the story headline for bookmarks StoryHeadline: <TITLE>(.*?) - JavaWorld.*</TITLE>
merlyns_columns.site:
URL: http://www.stonehenge.com/merlyn/LinuxMag/ AddURL: http://www.stonehenge.com/merlyn/WebTechniques/ Name: Randal Schwartz' columns Description: columns written by Randal Schwartz, perl hacker Levels: 2 ContentsDiff: 1 ContentsStart: <h2>Columns</h2> ContentsEnd: </html> TableRender: flatten # hmm, we don't have a decent .txt->.html renderer yet, so skip the # listings :( StoryURL: /merlyn/\S+/col\d+\.html StoryStart: <!-- INDEX END --> StoryEnd: </html> StoryHTMLPreProcess: { s/^\s+(=\d+=)/<br>$1/gm; }
php_net.site:
# php.net # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.php.net Name: PHP.net StoryStart: <!--http://www.php.net/--> StoryEnd: Old News
use_perl.site:
# use_perl.site -- based on the slashdot site. URL: http://use.perl.org/index.pl?light=1&noboxes=1&noicons=1 Name: use Perl Description: All The Perl That's P7L to E5T and R4T Levels: 2 MinPages: 2 ContentsStart: <HTML> ContentsEnd: </HTML> ContentsDiff: 1 StoryURL: http://use.perl.org/article.* StoryStart: <A href="submit.pl">Submit Story</A> \]</P> StoryEnd: <FORM METHOD="GET" ACTION="http://use.perl.org/search.pl"> # strip out the "login" and "related links" tables, they're irrelevant offline! # added Feb 2 2000 jm # StoryHTMLPreProcess: { s,<H3>use Perl Login</H3>.*?<B>The Fine Print:</B>,</FONT></TD></TR></TABLE>,s; } URLProcess: { # fix the URL; trim out all comment settings and use our own. s{^(http://use.perl.org/article.pl\?sid=\d+/\d+/\d+/\d+).*} {$1\&light=1\&noboxes=1\&noicons=1\&mode=nested\&threshold=0}g; if (!m,^http://use.perl.org/index.pl.light=1\&noboxes=1\&noicons=1, && !/mode=nested\&threshold=0/) { undef $_; # has to include these two; block it if it does not } } # skip URLs that have been archived StorySkipURL: http://use.perl.org/interviews/\d+/\d+/\d+/\d+.shtml
layouts.site:
# This is a site layouts file. Layouts are used to provide default information # on a given site's layout, providing StoryStart, StoryEnd, etc. parameters for # a given URL pattern. # # One advantage of this is that (for example) if an article in Slashdot links to # a Wired News article, the latter page can be scooped as well, without the # slashdot.site file knowing about Wired News' page format. # # This file should always be kept in the sites directory! # --------------------------------------------------------------------------- LayoutURL: http://www.wired.com/news/.* # layout for Wired News pages. Wired seems to change this every week!!, and # it's really messy right now. :( # StoryHeadline: <h1 class="lg">(.*?)</h1> StoryStart: (?i)<input type="submit" value="Go" class="blkbtn" /> StoryEnd: (?i)-- (END_OF_BODY|end content) -- ContentsStart: -- begin generic desk -- ContentsEnd: (?i)-- (end content|TRADES|FOOTER) -- StoryURL: http://www.wired.com/news/.+/[\d,]+\.html\S* StoryURL: http://r.wired.com/r/\d+/http://www.wired.com/news/.+/[\d,]+\.html\S* # I hate StoryServer. This is just ludicrous StoryToPrintableSub: s!(http://www.wired.com/news)/\S+/(\d+),\d+,([\d,]+\.html\S*)$!$1/print/$2,1294,$3! # --------------------------------------------------------------------------- LayoutURL: http://news.cnet.com/news/.* StoryStart: <font size="+2"> StoryEnd: <font face="Arial, Helvetica" size="-1"> StoryURL: http://news.cnet.com/news/\S+.html.* StoryToPrintableSub: s!(http://news.cnet.com/news/[-\d]+.html)?.*$!$1! # --------------------------------------------------------------------------- LayoutURL: http://news.bbc.co.uk/.* ContentsStart: <a name="startcontent"> ContentsEnd: SEARCH BBC NEWS StoryStart: <a name="startcontent"> StoryEnd: SEARCH BBC NEWS StoryURL: http://news.bbc.co.uk/.*low/.*stm.* # --------------------------------------------------------------------------- LayoutURL: http://quote.yahoo.com/q\?.* Levels: 1 StoryStart: Create New View ImageURL: http://chart.yahoo.com/c/.*\.gif StoryEnd: Quotes delayed 15 minutes for Nasdaq, 20 minutes otherwise. StoryHeadline: colspan=7><b>(.*?)</b> StoryCacheable: 0 UseTableSmarts: 0 StoryPostProcess: { s/\x8D//g; s/- More[^\n]*\n//g; s/\]//g; s/(Last Trade|Change|Prev Cls|Volume|Div Date|Day's Range|Bid|Ask|Open) *\n/\n$1 /g; s/(Avg Vol|Ex-Div|52-week Range|Earn\/Shr|P\/E|Mkt Cap|Div\/Shr|Yield) *\n/\n$1 /g; s/\n([ \t]*\n)+/\n\n/g; } # --------------------------------------------------------------------------- LayoutURL: http://www.wunderground.com/cgi-bin/findweather/getForecast\?.* StoryStart: Updated StoryEnd: <!-- blue credits bar --> UseTableSmarts: 0 StoryCacheable: 0 StoryPostProcess: { s/\n\n+/\n/g; s/Moon Phase//g; s/\x8D /\x8D/g; s/: /Updated: /; s/Add.*?cast\)//gs; s/\s+(Forecast as)/\n\x8D$1/g; s/\n(State Extended)/\n\x8D$1/g; s/\n(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Today|Tonight|Tomorrow)/\n\n\x8D$1/gs; s/\nTemperature *Probability of Precipitation\n//gs; s/Place/\n\n\x8DSummary\n/gs; } # --------------------------------------------------------------------------- # New York Times # Site file for Sitescooper (http://jmason.org/software/sitescooper/) # Written by: Kennis Koldewyn <kennis.koldewyn@wcom.com> # bits from: Andy Rabagliati <andyr@wizzy.com> # Last updated: 2000-09-05 LayoutURL: http://.*.nytimes.com/.* ContentsStart: </NYT_HEADER ContentsEnd: <NYT_FOOTER StoryStart: </NYT_HEADLINE StoryEnd: </NYT_TEXT # this is a default StoryURL, the sites can override it StoryURL: http://.*.nytimes.com/\d\d\d\d/\d\d/\d\d/.* # Contents pre-processing: ContentsHTMLPreProcess: { # Remove bogus absolute links: s/http:\/\/www.nytimes.com//gis; } # Story pre-processing: StoryHTMLPreProcess: { # Remove lists of online links, inline tables, inline images, etc.: s/<NYT_INLINEBLURB.*?<\/?NYT_INLINEBLURB>//gs; s/<NYT_INLINEIMAGE.*?<\/?NYT_INLINEIMAGE>//gs; s/<NYT_INLINETABLE.*?<\/?NYT_INLINETABLE>//gs; s/<NYT_LINKS_ONSITE.*?<\/?NYT_LINKS_ONSITE>//gs; s/<NYT_LINKS_OFFSITE.*?<\/?NYT_LINKS_OFFSITE>//gs; # Remove other NYT-specific tags: s/<\/?NYT_.*?>//gm; # Remove break after headlines: s/<\/H(\d)>\s*<BR>/<\/H$1>/gim; } # --------------------------------------------------------------------------- # Accuweather defaults, by Marko Bozikovic <marko.bozikovic /at/ envox.hr> LayoutURL: http://www.accuweather.com/adcbin/intlocal_index.* ImageURL: http://vortex.accuweather.com/iwxpage/adc/icons/.* StoryStart: <!-- 5 DAY HTML START --> StoryEnd: <!-- 5 DAY HTML END --> StoryCachable: 0 UseTableSmarts: 0 TableRender: keep AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr>
zen_stories.site:
# Zen stories URL: http://www.rider.edu/users/suler/zenstory/zenstory.html AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Zen stories Levels: 2 ContentsEnd: Do you have a good story to contribute StoryURL: .*\.html StorySkipURL: http://www.rider.edu/users/suler/zenstory/zenframe.html StorySkipURL: http://www.rider.edu/users/suler/zenstory/sendfriend.html StoryStart: <(HEAD|head)> StoryEnd: <(h|H)5>People's reactions StoryCachable: 1
advogato.site:
URL: http://www.advogato.org/article/ Name: Advogato Description: the free software developer's advocate Levels: 2 ContentsStart: <html> ContentsEnd: </html> ContentsDiff: 1 StoryURL: http://www.advogato.org/article/.* StoryStart: <html> StoryEnd: </html>
advogato_diaries.site:
URL: http://www.advogato.org/recentlog.html Name: Advogato Diaries Description: the free software developer's advocate Levels: 2 # specify quite a large range for the contents page, we want to pick up the "recent # log entries" link without having to AddURL it separately! # ContentsStart: <h1>Recent .*?</h1> ContentsEnd: </html> ContentsDiff: 1 StoryURL: http://www.advogato.org/article/.* StoryStart: <html> StoryEnd: </html>
alan_cox_diary.site:
URL: http://www.linux.org.uk/diary/ Name: Alan Cox Diary Description: the daily diary of Alan Cox, kernel hacker extraordinaire StoryStart: <!-- mark --> StoryEnd: </HTML> StoryDiff: 1
debian_weekly_news.site:
URL: http://www.debian.org/News/weekly/current/issue/ Name: Debian Weekly News StoryStart: <H1> StoryEnd: To receive this newsletter weekly in your mailbox StoryURL: http://www.debian.org/News/weekly/current/issue/ Levels: 1
desktoplinux.site:
# converted to use Palm format site, URL thanks to # http://members.bellatlantic.net/~blumax/plink.html ! # URL: http://www.desktoplinux.com Name: DesktopLinux Levels: 2 # ContentsStart: <html> # ContentsEnd: </html> ContentsDiff: 1 # StoryStart: Tell your friends # StoryEnd: </html> StoryURL: http://www.desktoplinux.com/.*.html
footnotes.site:
# Footnotes # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.gnomedesktop.org/modules.php?op=modload&name=AvantGo&file=index Name: Gnome FootNotes Levels: 2 # If you don't want the logo, comment this line out! ImageURL: http://www.gnomedesktop.org/.*\.png StoryPostProcess: { s/Date//gm; s/Article//gm; }
freshmeat.site:
# AuthorName: Carsten Clasohm # Last modified 2000/2/20 # # (moved by jm to replace the original freshmeat.site) URL: http://freshmeat.net/ Name: Freshmeat StoryStart: <TABLE CELLSPACING="0" CELLPADDING="3" StoryEnd: \[more articles/news\] StoryDiff: 1
gwn.site:
URL: http://www.gentoo.org/news/en/gwn/gwn.xml Name: Gentoo Weekly Newsletter Levels: 2 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.ph StoryURL: /news/en/gwn/.*-newsletter\.xml ImageURL: /images/gwn/.* ImageURL: /images/.*
kc_kde.site:
URL: http://kt.zork.net/kde/latest_print.html Name: KC - KDE Levels: 1 TableRender: list StoryStart: <li><strong>Threads Covered<\/strong> StoryEnd: </HTML> StoryIncludeStartPattern: 1 AuthorName: Adrian Burgess <adrian /at/ corrosive.freeserve.co.uk> StoryHTMLPreProcess: { s/<td><b>.*<\/b>\.<\/td><td>.*<\/td><td>\(.* posts\)<\/td>//gmi; }
kde-news.site:
# KDE Announcement and news (with comments) # revised by jm@jmason.org to just go straight to dot.kde.org, it's pretty much # taken over as far as I can see. # Author: Jarl Friis <jarl@diku.dk> URL: http://dot.kde.org/ Name: KDE news Levels: 2 AuthorName: Jarl Friis AuthorEmail: jarl@diku.dk ContentsIncludeStartPattern: 0 ContentsIncludeEndPattern: 0 ContentsStart: <!-- begin contents --> ContentsEnd: <!--end contents--> StoryURL: http://dot.kde.org/\d+/.* StoryStart: <!--mark demarcation2--> StoryEnd: <FORM METHOD="POST" ACTION="http://dot.kde.org/search"> ContentsDiff: 1
kernel_cousin_debian.site:
URL: http://kt.zork.net/debian/latest_print.html Name: Debian Kernel Cousin Levels: 1 StoryStart: Table Of Contents StoryEnd: </html> # AuthorName: "michael d. ivey" <ivey@gweezlebur.com> # new URL courtesy of: "David A. Desrosiers" <hacker@gnu-designs.com>
kernel_traffic.site:
URL: http://www.kerneltraffic.org/kernel-traffic/latest_print.html Name: Kernel Traffic Levels: 1 StoryStart: Table Of Contents StoryEnd: </html> # fixed by Derek Glidden <dglidden /at/ illusionary.com>
kerneltrap.site:
URL: http://www.kerneltrap.com/avantgo.php Name: kerneltrap.com Description: Your source for all the news that is the Linux kernel AuthorName: David Desrosiers StoryURL: http://kerneltrap.com/print.php.* StoryEnd: This article comes from kerneltrap.com
linux_gazette.site:
# About time I did this one! Linux Gazette, Jan 25 2000 <jm /at/ jmason.org>. # URL: http://www.linuxgazette.com/lg_frontpage.html Name: Linux Gazette Description: Linux Gazette... making Linux just a little more fun! (monthly) Levels: 3 IssueLinksStart: BEGIN issues IssueLinksEnd: END issues ContentsURL: /issue\d+/lg_\S+\.html ContentsStart: BEGIN toc ContentsEnd: END toc StoryURL: /issue\d+/\S+\.html StoryStart: END navbar StoryEnd: END copyright
linux_magazine.site:
URL: http://www.linux-mag.com/ Name: Linux Magazine Levels: 2 ContentsStart: <TD CLASS="FEATURES" BGCOLOR="CCFFCC" ALIGN="CENTER"> ContentsEnd: <TD CLASS="NEWS" BGCOLOR="CCFFCC" ALIGN="CENTER"> StoryURL: http://www.linux-mag.com/\d+-\d+/\S+.html.* StoryURL: http://www.linux-mag.com/cgi-bin/printer.pl.issue=\d+-\d+.article=.* StoryToPrintableSub: s,/(\d+-\d+)/(\S+)_\d+\.html,/cgi-bin/printer.pl\?issue=$1\&article=$2, StoryStart: <BODY StoryEnd: </html>
linuxdevices.site:
URL: http://www.linuxdevices.com/news/ Name: LinuxDevices.com Description: the embedded Linux portal Levels: 2 ImageURL: /images/readmore.gif ContentsStart: Best match</option> ContentsEnd: More *Links *...</a> StoryURL: http://www.linuxdevices.com/(news|links|events|articles)/.*\.html StoryStart: Best match</option> StoryEnd: <b>Latest headlines:</b> TableRender: flatten
slashdot.site:
# Slashdot.site -- now including comments scored 3 or higher. # TODO: strip out the so-called "funny" comments ;) # # Kornelis Sietsma <korny /at/ sietsma.com>: comments support # jm: fixed again to use light mode throughout # bms: minor changes to pick up ask.slashdot.org it.slashdot.org URL: http://slashdot.org/index.pl?light=1&noboxes=1&noicons=1 Name: SlashDot Levels: 2 ContentsStart: <A href=/hof.shtml>hof</A> ContentsEnd: <P><P>\[ <FONT size=2><B> StoryURL: http://slashdot.org/article.* StoryURL: (http://.*.slashdot.org/article.*|http://slashdot.org/article.*) StoryStart: <A href=/hof.shtml>hof</A> StoryEnd: <P>\[ <FONT size=2><B> # strip out the "login" and "related links" tables, they're irrelevant offline! # added Feb 2 2000 jm # StoryHTMLPreProcess: { s,<H3>Slashdot Login</H3>.*?<B>The Fine Print:</B>,</FONT></TD></TR></TABLE>,s; } # Because slashdot has so many links allowing views of stories with different # comment levels, formats, etc., we need a way to fix or block them here. # Unfortunately it's a bit tricky so we need to use perl code. We could just # ignore the comments, but I guess that's missing the point of slashdot ;) # added May 18 2000 jm # URLProcess: { # fix the URL; trim out all comment settings and use our own. s{^(http://slashdot.org/article.pl\?sid=\d+/\d+/\d+/\d+).*} {$1\&light=1\&noboxes=1\&noicons=1\&mode=nested\&threshold=3}g; s{^(http://.*.slashdot.org/article.pl\?sid=\d+/\d+/\d+/\d+).*} {$1\&light=1\&noboxes=1\&noicons=1\&mode=nested\&threshold=3}g; if (!m,^http://slashdot.org/index.pl.light=1\&noboxes=1\&noicons=1, && !/mode=nested\&threshold=3/) { undef $_; # has to include these two; block it if it does not } } # skip URLs that have been archived StorySkipURL: http://slashdot.org/interviews/\d+/\d+/\d+/\d+.shtml StoryHeadline: <TITLE>Slashdot \| (.*?)</TITLE>
a_word_a_day.site:
URL: http://wordsmith.org/words/ Name: A.Word.A.Day Levels: 1 StoryStart: RealAudio\s*</A> StoryEnd: </BODY>
drinkboy.site:
# The Drinkboy Channel URL: http://www.drinkboy.com/offline/index.html AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: The Drinkboy Channel Levels: 3 ImageURL: http://www.drinkboy.com/offline/images/.*gif IssueLinksStart: <html> IssueLinksEnd: </html> IssueCacheable: 0 ContentsURL: http://www.drinkboy.com/offline/.*html ContentsStart: <html> ContentsEnd: </html> ContentsCachable: 0 StoryURL: http://www.drinkboy.com/offline/Recipes/.*html StoryStart: <html> StoryEnd: </html> StoryCachable: 0
world_wide_words.site:
URL: http://www.worldwidewords.org/ Name: World Wide Words Description: Investigating international English from a British viewpoint Levels: 2 ContentsStart: Q and A ContentsEnd: JOIN THE MAILING LIST ContentsUseTableSmarts: 0 # urgh, titles are images. Use their alt tags UseAltTagForURL: /img/.*\.gif # get all articles -- but not the indexes (which are huge) StoryURL: /(qa|topicalwords|inbrief|articles|weirdwords|turnsofphrase|reviews)/.*\.htm.* StorySkipURL: /(qa|topicalwords|inbrief|articles|weirdwords|turnsofphrase|reviews)/index\.htm StoryStart: BODY TEXT AREA StoryEnd: World Wide Words is copyright StoryUseTableSmarts: 0 TableRender: flatten
wired_news_business.site:
URL: http://www.wired.com/news/news/business/ Name: Wired News Business Levels: 2 StoryURL: .*//www.wired.com/news/(business|print)/[\d,]+\.html.*
wired_news_culture.site:
URL: http://www.wired.com/news/news/culture/ Name: Wired News Culture Levels: 2 StoryURL: .*//www.wired.com/news/(culture|print)/[\d,]+\.html.*
wired_news_politics.site:
URL: http://www.wired.com/news/politics/ Name: Wired News Politics Levels: 2 StoryURL: .*//www.wired.com/news/(politics|print)/[\d,]+\.html.*
wired_news_tech.site:
URL: http://www.wired.com/news/news/technology/ Name: Wired News Technology Levels: 2 StoryURL: .*//www.wired.com/news/(technology|print)/[\d,]+\.html.* ContentsDiff: 1
USNews.site:
URL: http://www.usnews.com/usnews/issue/home.htm Name: USNews-Tue Description: USNews AuthorName: Goh Boon Nam # US News and World Report # Version 1.2 # Date updated : 5 Apr 2005 # Changes for 1.1 : Change of page format # Changes for 1.2 : Change of StoryURL Levels: 2 ContentsStart: <!-- DATE --> ContentsEnd: <!-- Begin Ad1 --> StoryURL: http://www.usnews.com/usnews/(.*?)/articles/.* StoryStart: <div class="articleSectionTitle"> StoryEnd: <!-- E-MAIL FORM --> StoryFollowLinks: 1 StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/ //gim; s/<br clear="all">//gim; s/<p>\\n(.*?)//gim; }
atlantic.site:
# The Atlantic # Originally by Akkana Peck URL: http://www.theatlantic.com/ Name: The Atlantic Levels: 2 UseTableSmarts: 0 ContentsStart: <font class="rubric"> StoryURL: http://www.theatlantic.com/issues/[\d]+/[\d]+/.*\.htm StoryURL: http://www.theatlantic.com/unbound/.*\.htm StoryStart: <!--ARTICLE CONTENT BEGINS-->
cnn_mobile.site:
# CNN Mobile URL: http://wireless.cnn.com/avantgo/cnn/index.html # created from PODS file by David A. Desrosiers AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: CNN Mobile Levels: 2 ImageURL: .*\.gif ImageScaleToMaxWidth: 150 ContentsCachable: 0 StoryURL: http://wireless.cnn.com/avantgo/cnn.* StoryCachable: 1
newsweek.site:
URL: http://www.msnbc.com/news/nw-front_front.asp?0dm=s---k Name: Newsweek Description: Newsweek AuthorName: Goh Boon Nam # Version 1.2 # Date updated : 24 Nov 2003 # Changes made : StoreyEnd changed to work better for all articles Levels: 2 ContentsStart: ---Insert_Tertiary_Stories--- ContentsEnd: nwk_hp_header_webex.gif StoryURL: http://www.msnbc.com/news/\d+\.asp StoryStart: bantop_(.*?).gif StoryEnd: (<b>MSNBC READER(.*?)S TOP 10<\/b>|Newsweek, Inc) StoryFollowLinks: 1 ContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<img src="http:(.*?)nwk_hp_header_inted.gif">/International Editions/gim; } StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/MPA NEW TEMPLATE CODE STARTS HERE(.*?)END MPA NEW TEMPLATE CODE HERE//gis; s/<font style="font-family:arial;font-size:13px(.*?) /<P>/gis; s/ /<P>/gis; }
newsweek_intl.site:
URL: http://www.msnbc.com/news/nw-int_front.asp? Name: NewsweekIntl-Tue Description: Newsweek International AuthorName: Goh Boon Nam # Version 1.6 # Date updated : 27 Jun 2005 # Updated by : Goh Boon Nam # Changes made : Removal of Page Header that appears in some pages # : Workaround for relative URL not recognised by Sitescooper nextpage function # : New StoryEnd # : New ContentsEnd to remove hanging <b tag which causes all stories to be in bold Levels: 2 ContentsStart: FROM THIS WEEK'S ISSUE ContentsEnd: <b color="......."> FROM THE PREVIOUS ISSUE StoryURL: http://www.msnbc.msn.com/id/.* StoryStart: (class="headlineStory"|class="deckStory") StoryEnd: (©(.*?)Newsweek, Inc|Print this) # urgh, first article title is an image. Use its alt tag #UseAltTagForURL: http://(.*?).jpg ContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/Â//gim; s/—/--/gim; s/•/<BR>/gim; s/ //gim; } StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/Â//gim; s/—/--/gim; s/•/<BR>/gim; s/ //gim; s/advertisement<br>//gim; s/<font class="textSmallBold"(.*?)<\/table><\/td><\/tr><\/table>//gis; s/href="\/id\//href="http:\/\/www.msnbc.msn.com\/id\//gim; } # s/href="\/id\//href="http:\/\/www.msnbc.msn.com\/id\//gim; # above caters to Sitescooper follownext function cannot work with relative URL; needs absolute URL
usa_today.site:
# usa_today.site # grabs the handheld version of the USA Today's paper # Henry Justin <jhenry@fjicl.com> URL: http://www.usatoday.com/palm/usatoday.htm Name: USA Today Levels: 3 ImageURL: http://www.usatoday.com/palm/.* ContentsCacheable: 0 StoryCacheable: 0
yahoo_business.site:
# Yahoo- Business # Original by: <spacehog@bsdjournal.com> Patrick Clochesy # Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com> URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/580/index.xml Name: Yahoo! Business Levels: 3
yahoo_entertainment.site:
# Yahoo- Entertainment # Original by: <spacehog@bsdjournal.com> Patrick Clochesy # Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com> URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/529/index.xml Name: Yahoo! Entertainment Levels: 3
yahoo_politics.site:
# Yahoo- Politics # Original by: <spacehog@bsdjournal.com> Patrick Clochesy # Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com> URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/584/index.xml Name: Yahoo! Politics Levels: 3
yahoo_tech.site:
# Yahoo- Tech # Original by: <spacehog@bsdjournal.com> Patrick Clochesy # Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com> URL: http://pg1.yahoo.com/raw?dp=news&f=/us/news/581/index.xml Name: Yahoo! Tech Levels: 3
yahoo_top_stories.site:
# Yahoo- Top Stories # Original by: <spacehog@bsdjournal.com> Patrick Clochesy # Updated by: "Jonathan Becker" <jonathanlbecker@yahoo.com> URL: http://wap.oa.yahoo.com/raw?dp=news&f=/us/news/578/index.xml Name: Yahoo! Top Stories Levels: 3
blather.site:
# Fortean news from blather.net URL: http://www.blather.net/current.html Name: Blather Levels: 1 StoryStart: <BODY StoryEnd: feedback and comments
davenet.site:
# Dave Winer's DaveNet URL: http://davenet.userland.com/ Name: DaveNet Levels: 1 # this is inconvenient; there's no easy way to find start of text anymore. StoryStart: Part of the .*? website, syndicated on .*?</b> StoryEnd: <br>Last update: StoryDiff: 1
i_cringely.site:
URL: http://www.pbs.org/cringely/ Name: I, Cringely Levels: 2 ContentsStart: div id="fpulp" ContentsEnd: div id="ftitshop" StoryURL: http://www.pbs.org/cgi-registry/cringely/.* StoryCacheable: 0
nro.site:
# National Review Online # URL: http://www.nationalreview.com/ Name: National Review Online Levels: 2 ImageURL: /images/dropcaps/\D\.gif StoryURL: /\S+/\D+\d+\.asp StoryURL: /\S+/script/printpage.asp?ref=\D+\d+\.asp StoryToPrintableSub: s,^(http://.*\.com)(\D+\d+\.asp)$,$1/script/printpage.asp?ref=$2, #http://www.nationalreview.com/nr_comment/nr_comment050802.asp #http://www.nationalreview.com/script/printpage.asp?ref=/nr_comment/nr_comment050802.asp
pulpit.site:
URL: http://cgi.pbs.org/cgi-registry/cringely/thisweek.pl?pulpit Name: The Pulpit Levels: 1 StoryStart: <!--========================== Content between these lines ==========================--> StoryEnd: <!--%%ENDCOLUMN%%--> # only scoop on fridays # EvaluatePerl: { # my(@time); # my($day); # @time = localtime(); # $day = $time[6]; # day of week is 6th element of localtime # $skip_site = ($day =~ /[^5]/); #skip_site=true if day is not friday # }
roving_reporter.site:
URL: http://tbtf.com/roving_reporter/ Name: the roving_reporter Description: t. byfield's regular column on the TBTF site Levels: 1 StoryStart: <table border="0" cellpadding="9" cellspacing="0" width="90%" bgcolor="white"> StoryEnd: <b>The r_r began as a semi-collaborative nym StoryDiff: 1
salon.site:
# Salon.site -- with contributions from Henry Justin <jhenry /at/ fjicl.com>, # Eric <ethomas /at/ deltanet.com> and Justin <jm /at/ jmason.org> URL: http://www.salon.com/ Name: Salon Magazine Levels: 2 ContentsStart: -- *begin feature story *-- ContentsEnd: >Illustration by ContentsDiff: 1 StoryURL: http://www.salon.com/(books|politics|news|people|tech|mwt|health|ent|media|travel|letters|sex|business)/.*/.* # skip the middle pages StorySkipURL: http://www.salon.com/(books|politics|news|people|tech|mwt|health|ent|media|travel|comics|letters|sex|business)/print.html # and the newswire stories StorySkipURL: http://www.salon.com/wire/.* # StoryToPrintableSub: s,^(http://www.salon.com/.*)/(?:index.html|$),\1/print.html, StoryStart: <p>To print this page, select "Print" from the File menu of your browser</p> StoryEnd: Sound Off StoryHeadline: <title>Salon.com \S+ \| (.*?)</title> MinPages: 2
suck.site:
URL: http://www.suck.com/daily/avantgo/ Name: Suck.com Levels: 1
slate.site:
URL: http://www.slate.com/?id=85223&date.x=10&date.y=10 Name: Slate Levels: 2 ContentsDiff: 1 ContentsStart: <form name="form3"> ContentsEnd: </form> ContentsHTMLPreProcess: s,<input.*?>,,gi StoryURL: http://(www\.)?slate.(msn\.)?com/id/.+ StoryURL: http://(www\.)?slate.(msn\.)?com/toolbar.+ StoryToPrintableSub: s,id/(\d+)/.*,toolbar.aspx?action=print&id=$1,
alanmiller.site:
URL: http://www.alan-miller.org/ Name: Alan Miller Levels: 2 StoryURL: http://www.alan-miller.org/.* ContentsDiff: 1
palm_boulevard.site:
URL: http://www.palmblvd.com/channels/avantgo/index.html Name: Palm Boulevard Description: The Complete Independent Palm Resource Levels: 3 ImageURL: http://www.palmblvd.com/channels/avantgo/.* ContentsURL: http://www.palmblvd.com/channels/avantgo/[^/]+\.html? StoryURL: http://www.palmblvd.com/channels/avantgo/\S+/.* IssueCacheable: 0 ContentsCacheable: 0 StoryCacheable: 0
palmpilotsoftware.site:
# URL thanks to http://members.bellatlantic.net/~blumax/plink.html ! # URL: http://www.zdnet.com/downloads/avantgo/index.html Name: PalmPilot Software Description: www.palmpilotsoftware.com Levels: 1 ImageURL: http://www.zdnet.com/swlib/avantgo/images/.*
palmpower.site:
# Yay! Finally tracked down the PalmPower "for palmpilot delivery" site. These # are MUCH easier to read than the full javascript, multipage stuff. # URL: http://www.palmpower.com/avantgo/ Name: PalmPower Description: Tips and techniques exclusively for Palm computer users. Levels: 3 IssueLinksStart: <HTML> IssueLinksEnd: </HTML> IssuePrint: 1 ContentsURL: http://.*.(pair\.com|palmpower\.com)/.*\.html ContentsStart: <HTML> ContentsEnd: </HTML> ContentsPrint: 1 StoryURL: http://.*.(pair\.com|palmpower\.com)/.*\.html StoryStart: <HTML> StoryEnd: </HTML>
palmstation.site:
URL: http://www.palmstation.com/palmstation.rdf Name: PalmStation.Com Description: News, Views, Reviews... To Feed The Need ContentsFormat: rss StoryURL: /view_article\.asp.* StoryStart: <td bgcolor=#550010 colspan=2> StoryEnd: Comment on this # (This is a sitescooper site file. see http://sitescooper.tsx.org/ # It was generated from the site's RSS by rss-to-site.pl 1.0.)
visorcentral_discussion.site:
URL: http://discussion.visorcentral.com/vcforum/search.php?action=getdaily Name: VisorCentral Discussion Levels: 2 ContentsURL: /vcforum/search.php.* StoryURL: http://discussion.visorcentral.com/vcforum/showthread.php\?s=[0-9a-z]+\&threadid=\d+ StoryUseTableSmarts: 0 StoryStart: <!-- end marcus header --> StoryEnd: <!-- /time zone and post buttons --> #remove the line below if you only want new threads, and not old threads with new replies StoryCacheable: 0 #maximum size of the output file SizeLimit: 5000 Description: the site to visit for Handspring Visor owners AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
visorcentral_mobile.site:
URL: http://www.visorcentral.com/mobile/ Name: VisorCentral Mobile Levels: 2 ContentsURL: /mobile/.* ContentsDiff: 0 StoryURL: /mobile/news\.php\?limit=\d StoryURL: /mobile/review\.php\?limit=\d+ StoryStart: <a name='thetop'></a> StoryEnd: <div align='left'> #this line below gets the full text, every time, even if you had seen the same story already StoryCacheable: 0 Description: VisorCentral Mobile Edition AuthorName: Yoon Fui Thean AuthorEmail: yoonfui /at/ bigfoot.com
la_lettre_edition_mobile.site:
URL: http://www.lalettre.com/palm-pilot/news/1.html Name: La Lettre de l'Internet Levels: 2 ContentsPrint: 1 ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
motley_fool.site:
URL: http://www.fool.com/partners/avantgo/index.htm Name: The Motley Fool - News Levels: 1 ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
the_guardian_palmsized.site:
# The UK Guardian, Palmsized # Site file for Sitescooper (http://jmason.org/software/sitescooper/) # Last updated: 13 Sep 2001 stewart@ref.collins.co.uk # s/guardianunlimited/guardian/; URL: http://www.guardian.co.uk/avantgo/ Name: UK Guardian Levels: 3 # ensure that pages with "avantgostory" are treated as "stories" # by sitescooper. This ensures that we don't wind up with 'no # new stories, ignoring'. ContentsURL: http://www.guardian.co.uk/(avantgostories|avantgo/).*\.html StoryURL: http://www.guardian.co.uk/avantgostory/.*\.html ImageURL: http://www.pixunlimited.co.uk/.* ContentsSkipURL: http://www.guardian.co.uk/avantgo/advertpage/.*
the_onion_pda.site:
# the_onion_pda.site # * 2000-04-19, Andrew Chadwick: corrected depth, removed shell # metachars from Name, added advert and avantgo markup removal. URL: http://mobile.theonion.com/ Name: The Onion Description: The Onion (PDA Edition). Levels: 2 ImageURL: .* ContentsHTMLPreProcess: { s!<br/?>\s*<br/?>!<p>!gsi; s!<A\s+HREF=.http://adbot.theonion.com[^>]+>.*?</A>!!gsix; } StoryHTMLPreProcess: { s!<br/?>\s*<br/?>!<p>!gsi; s!(<A\s+HREF=.?) pods://avantgo/back/? ([^>]*>.*?</A>) !$1http://mobile.theonion.com/$2!xsig; s!<A\s+HREF=.http://adbot.theonion.com[^>]+>.*?</A>!!gsix; }
the_register_rss.site:
URL: http://www.theregister.co.uk/headlines.rss Name: The Register RSS Levels: 2 ContentsFormat: rss StoryURL: .* StoryToPrintableSub: s,^(http://go.theregister.com/feed/)(.*),http://www.theregister.co.uk/\2print.html,
inq7-mobile.site:
URL: http://news.inq7.net/mpda/html_output/ Name: INQ7 mobile Description: The Philippine Daily Inquirer and GMA Network News Web site for Mobile Phones and PDA's Levels: 3 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.ph ContentsURL: /mpda/html_output/.* StoryURL: /mpda/html_output/.* StoryURL: http://money\.inq7\.net/topstories/printable_topstories\.php.* ImageURL: /mpda/html_output/.*\.gif ImageURL: http://www\.inq7money\.net/images/header/.*\.gif
movietickets.site:
# movietickets.com showtimes site file # change the URL: to include your zip code URL: http://www.movietickets.com/house_list.asp?house_id=0&lng=0&movie_id=0&SearchZip=27560&SearchCity=&SearchState=&SearchSort=0&ShowDate=0&SearchRadius=15&image1.x=0&image1.y=0 Name: Movie Showtimes Levels: 2 ContentsStart: Select a theater to display showtimes. ContentsEnd: <a href="house_search.asp"> StoryStart: <td class="TheaterName"> StoryEnd: Look on another date? ImageURL: http://a1608.g.akamai.net/7/1608/1174/200114114050/www.movietickets.com/images_real/no_passes2.gif
my_yahoo.site:
# # You will need to set the MY_YAHOO_NAME and MY_YAHOO_PASSWORD environment # variables before using this site. # URL: http://login.yahoo.com/config/login?.src=my&.tries=1&.done=http://my.yahoo.com/&login=${MY_YAHOO_NAME}&passwd=${MY_YAHOO_PASSWORD} RequireEnvVariable: MY_YAHOO_NAME Name: My Yahoo Description: My Yahoo! Levels: 2 StoryURL: http://.*.yahoo.com/.* StorySkipURL: .*external.* StorySkipURL: http://(dir|edit).yahoo.com/.* StorySkipURL: http://.*search.*.yahoo.com/.* StoryFollowLinks: 0 # don't follow "More..." links ContentsUseTableSmarts: 0 StoryUseTableSmarts: 0 TableRender: flatten
sydney_morning_herald.site:
AuthorName: Yvonne Smith <yvonne@thewatch.net> URL: http://www.smh.com.au/text/ Name: Sydney Morning Herald Description: The Sydney Morning Herald SizeLimit: 500 Levels: 2 StoryURL: http://www.smh.com.au/news/.* StoryStart: <H1> StoryEnd: </BOD> StoryHeadline: <H1> (.*)</H1>
yourmovies_canberra.site:
### For Canberra, Australia residents. Could be adjusted for other Australian cities URL: http://www.yourmovies.com.au/handheld/session_times.cfm?&venue_id=21604,224849,93546,239722,224927,253443,90268&sort=movie Name: YourMovies Canberra Levels: 2
bostonglobe.site:
URL: http://www.boston.com/news/globe/city_region/ Name: Boston_Globe Description: Boston Globe City & Region AuthorName: Bruce Zohn AuthorEmail: coffeecat@bigfoot.com Levels: 2 ContentsURL: /news/globe/city_region/.* StoryURL: /news/local/.* ImageURL: http://cache.boston.com/bonzai-fba/.* ContentsStart: <div class="mainContent"> ContentsEnd: <div class="footerLinks"> StoryStart: <div class="story"> StoryEnd: <div class="toolsMain"> StoryFollowLinks: 4 StorySkipURL: /news/local/.*\?mode=PF StorySkipURL: /news/local/.*\?pg=full
la_times_frontpage.site:
# The Los Angeles Times Front Page URL: http://www.latimes.com/news/ Name: LA Times Front Page Levels: 2 StoryURL: /news/front/.*\.html ContentsStart: <!-- TIMESTAMP --> ContentsEnd: <!-- END RIGHT MAIN --> StoryStart: Print this story StoryEnd: <!--STORY ENDS-->
bayarea_com_news.site:
URL: http://www.bayarea.com/news/ Name: BayArea.com News Description: local news for the SF Bay Area from BayArea.com AuthorName: Bill Janssen AuthorEmail: bill@janssen.org Levels: 2 StoryURL: http://www.bayarea.com/.* ContentsCachable: 0 ContentsStart: <!--Date--> ContentsEnd: <CENTER> StoryStart: <SNML_HEADLINES> StoryEnd: </SNML_BODY> ContentsHTMLPreProcess: { s,(<I><B>Last updated</B>),<H1>SF Bay Area News</H1><br><i>(from http://www.bayarea.com/news/)</i><br>$1,s; }
sf_chronicle_food.site:
URL: http://www.sfgate.com/cgi-bin/article-list.cgi?key=FD&directory=Food Name: SF Chronicle Food Description: San Francisco Chronicle Food section (published Wednesdays) AuthorName: Bill Janssen AuthorEmail: bill@janssen.org Levels: 2 StoryURL: http://www.sfgate.com/cgi-bin/article.cgi.* ImageURL: .*/templates/brands/chronicle/images/chronicle\.gif ContentsCachable: 0 ContentsStart: <!--END HEADLINE MODULE--> ContentsEnd: <!-- \*\*\*\*\* END OF BIG TABLE \*\*\*\*\* --> StoryStart: <!-- BEGIN HEADLINE NESTED TABLE --> StoryEnd: <!-- \*\*\*\*\* END OF BIG TABLE \*\*\*\*\* --> ContentsHTMLPreProcess: { s,^,<H1>SF Chronicle Food Section</H1>,s; } StoryPostProcess: { s/<TABLE WIDTH=[0-9]*/<TABLE/gm; s/<TD([^>]*)WIDTH=[0-9]*/<TD$1/gm; s/<TD([^>]*)NOWRAP/<TD$1/gm; }
sfgate_com_news.site:
URL: http://www.sfgate.com/news/ Name: SFGate.com News Description: SF Bay Area news stories from sfgate.com (Chronicle/Examiner/KRON) AuthorName: Bill Janssen AuthorEmail: bill@janssen.org Levels: 2 StoryURL: http://www.sfgate.com/cgi-bin/article.cgi.* StoryURL: http://www.sfgate.com/news/baycitynews/.* ContentsCachable: 0 ContentsStart: <!-- \*\*\*\*\*\*\*\* BEGIN STEW \*\*\*\*\*\*\*\*\*\*\* --> ContentsEnd: <!-- END STORIES SCRIPT \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* --> StoryStart: <!-- BEGIN HEADLINE NESTED TABLE --> StoryEnd: <!-- \*\*\*\*\* END OF BIG TABLE \*\*\*\*\* --> ContentsHTMLPreProcess: { s/^/<H1>News from SFGate\.com<\/H1><br><i>(from SF Chronicle, SF Examiner, KRON, AP, SF Gate sources)<\/i><br>/gs; } StoryPostProcess: { s/<TABLE WIDTH=[0-9]*/<TABLE/gm; s/<TD([^>]*)WIDTH=[0-9]*/<TD$1/gm; s/<TD([^>]*)NOWRAP/<TD$1/gm; }
chicago_tribune_business.site:
# Site file for Sitescooper (http://sitescooper.org) # Written by: David Czerwinski <david_czerwinski@yahoo.com> # 12-23-00 URL: http://www.chicagotribune.com/business/printedition/ Name: Trib Business Description: Chicago Tribune Business Section Levels: 2 StoryURL: /business/printedition/article/.*\.html StoryEnd: <LI> <A HREF=.*>E-mail this story to a friend</A>
chicago_tribune_front_page.site:
# Site file for Sitescooper (http://sitescooper.org) # Written by: David Czerwinski <david_czerwinski@yahoo.com> # 12-23-00 URL: http://www.chicagotribune.com/news/printedition/ Name: Trib Front Page Description: Chicago Tribune Front Page Section Levels: 2 StoryURL: /news/printedition/article/.*\.html StoryEnd: <LI> <A HREF=.*>E-mail this story to a friend</A>
chicago_tribune_sports.site:
# Site file for Sitescooper (http://sitescooper.org) # Written by: David Czerwinski <david_czerwinski@yahoo.com> # 12-23-00 URL: http://www.chicagotribune.com/sports/printedition/ Name: Trib Sports Description: Chicago Tribune Sports Section Levels: 2 StoryURL: /sports/printedition/article/.*\.html StoryEnd: <LI> <A HREF=.*>E-mail this story to a friend</A>
Vecernji.site:
# Vecernji List # - comment out ContentsSkipURL: lines for links you don't want scooped URL: http://www.vecernji-list.hr AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Vecernji List Levels: 3 ImageURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Media/(RTV-HRT1|RTV-HRT2|RTV-HRT3|RTV-OTV|RTV-NOVA|RTV-NET|RTV-EURO|RTV-DSF|)\.(gif|JPG) IssueLinksStart: <td width="1\d\d" align="left" valign="top"> IssueLinksEnd: <a href="Pages/PROGNOZA.html"> IssueUseTableSmarts: 0 IssueCacheable: 0 ContentsURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/.*\.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/HRV-NAJ.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/SVI-NAJ.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ZAG-NAJ.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/TIS-NAJ.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/PLUS-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/KUL-NAJ.html # they seem to change sport's page URL every now and then... ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/SPO-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/SPORT-NAJAVA.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRN-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/HORO-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ODV-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/TRECA-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/MOZ-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/STIL-NAJ.html ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/DJE-NAJ.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/RTV-PROGRAM.html # ContentsSkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ZAN-NAJ.html ContentsStart: <td width="4\d\d" valign="top" bgcolor=" ContentsEnd: </html> ContentsUseTableSmarts: 0 ContentsCachable: 0 StoryURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/.*\.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/formular.*\.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/Kronologija.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/Kronologija-slika.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRKVA.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/ZEMLJE-POTPIS.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRNA-BROJEVI.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRNA-ZELJEZNICE.html StorySkipURL: http://www.vecernji-list.hr/\d\d\d\d/\d\d/\d\d/Pages/CRNA-SAMOUBOJSTVA.html StoryStart: <td width="4\d\d" valign="top" bgcolor=" StoryEnd: <ul> StoryUseTableSmarts: 0 StoryCachable: 0 TableRender: keep
accuweather_zagreb.site:
# Accuweather - Zagreb # you'll have to go to Accuweather site, find your city and # copy url to the line below URL: http://www.accuweather.com/adcbin/intlocal_index?wxcity2=ZAGREB&wxcountry=EU;RT AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Accuweather - Zagreb Levels: 1 # other details are picked up automatically from ../lib/layouts.site
berlingsketidende.site:
# Berlingske Tidende # Author: Jarl Friis <jarl@diku.dk> URL: http://www.berlingske.dk Name: Berlingske Levels: 2 AuthorName: Jarl Friis AuthorEmail: jarl@diku.dk ImageURL: 1 ContentsIncludeStartPattern: 1 ContentsIncludeEndPattern: 0 ContentsUseTableSmarts: 0 ContentsStart: <!-- Template c_forside_hovedhistorie begin --> ContentsEnd: <!-- Template c_forside_hovedhistorie end --> ImageURL: http://www.berlingske.dk/grafik/redaktion/.* #### ####Use this if you want simple story versions: #### #StoryToPrintableSub: s,(http://www.berlingske.dk/artikel:aid)(=[0-9]*),http://www.berlingske.dk/popup:print\2,i; #StoryURL: http://www.berlingske.dk/popup:print=[0-9]* StoryStart: <hr size=1 width=100%> StoryEnd: <!-- WebMeasure Starts --> #### ####Use this if you want image/table versions: #### StoryStart: <!-- Template c_artikel begin --> StoryEnd: <!-- Template c_artikel end --> StoryIncludeStartPattern: 0 StoryIncludeEndPattern: 0 TableRender: keep #StoryLifetime: 0 ContentsCacheable: 1 StoryCacheable: 1 ContentsDiff: 1 StoryDiff: 1
computerworld.dk.site:
# Computerworld.dk online # Author: Jarl Friis <jarl@diku.dk> URL: http://www.computerworld.dk Name: Computerworld DK Levels: 2 AuthorName: Jarl Friis AuthorEmail: jarl@diku.dk ContentsIncludeStartPattern: 1 ContentsIncludeEndPattern: 0 ContentsUseTableSmarts: 0 ContentsStart: <table width="490" border="0" cellspacing="0" cellpadding="0" VALIGN=TOP> #for few news: ContentsEnd: <TABLE CELLSPACING=0 CELLPADDING=0 WIDTH=510 > #for categorised news: #ContentsEnd: <!-- INDHOLD -- BREDDE = 490 PIXELS --> StoryToPrintableSub: s,(Vis_artikel)(.asp.ArticleID=[0-9]*),\1_til_udskrift\2,i; StoryURL: http://www.computerworld.dk/[vV]is_artikel_til_udskrift.asp.ArticleID=[0-9]* ImageURL: http://www.computerworld.dk/[Ii]mages/[^/]*[.].{3} StoryIncludeStartPattern: 0 StoryIncludeEndPattern: 0 StoryStart: <img src="Images/SiteImages/stort_computerworldlogo.gif" width=442 height=86 alt="" border="0"> #This does not even exists, but it avoids standard sitescooper-"layouts" StoryEnd: </HTML> TableRender: keep #StoryLifetime: 0 ContentsCacheable: 1 StoryCacheable: 1 ContentsDiff: 1 StoryDiff: 1
dmi-vejret.site:
# Vejrudsigt for DMI # Author: Jarl Friis <jarl@diku.dk> #Change this to your lokal weather: URL: http://www.dmi.dk/vejr/regionaludsigter/kbhnsj.txt.html Name: DMIs vejrudsigt Levels: 1 AuthorName: Jarl Friis AuthorEmail: jarl@diku.dk #Danish 7-days forecast: AddURL: http://www.dmi.dk/vejr/7dgnland/7dgnland.txt.html StoryIncludeStartPattern: 0 ContentsIncludeEndPattern: 0 StoryStart: <img src="/gifs/dmi-logo2.gif" width=40 height=88> StoryEnd: </BODY> #StoryUseTableSmarts: 0 #TableRender: keep #StoryLifetime: 0 ContentsCacheable: 0 StoryCacheable: 0 ContentsDiff: 0 StoryDiff: 0
geekculture.site:
# http://www.geekculture.dk/ # Author: Jan Lund Thomsen <kwed@kwed.org> URL: http://www.geekculture.dk/ Name: GeekCulture.dk Levels: 2 AuthorName: Jan Lund Thomsen AuthorEmail: kwed@kwed.org ContentsStart: section=11 ContentsEnd: Ćldre artikler StoryURL: http://www.geekculture.dk/arkiv.php3\?reviewid=.* StoryStart: alt="Tilbage til Hovedsiden" StoryEnd: <form method=post action="sendtilven.php3?reviewid=877"><font size="-1">
ingeniřren.site:
# Ingeniřren # Author: Jarl Friis <jarl@diku.dk> URL: http://www.ing.dk Name: Ingeniřren Levels: 2 AuthorName: Jarl Friis AuthorEmail: jarl@diku.dk ContentsIncludeStartPattern: 1 ContentsIncludeEndPattern: 1 ContentsStart: <!-- Indholde Start --> #This will NOT include the ShortNews: ContentsEnd: <TR><TD COLSPAN="2"><IMG SRC="/ress/ramme/d.gif" WIDTH="2" HEIGHT="3" ALT=""></TD></TR></TABLE> #This will include ShortNews #ContentsEnd: </TD></TR></TABLE> <BR> #ContentsEnd: </HTML> #seem not to work ... ShortNews has anotherlayout. StoryStart: <!-- .BeginEditable "trumpet" --> StoryEnd: <!-- .BeginEditable "hojre_spalte_nede_bund" --> ImageURL: http://www.ing.dk/arkiv/.* StorySkipURL: mailto:.* StoryUseTableSmarts: 0 TableRender: flatten ContentsCacheable: 1 StoryCacheable: 1 ContentsDiff: 1 StoryDiff: 1 Active: 1
politiken_daily_summary.site:
# site_samples/regional_denmark/politiken_daily_summary.site # # Daily news summary from Danish newspaper 'Politiken'. AuthorName: Jan Lund Thomsen AuthorEmail: kwed@kwed.org URL: http://politiken.dk/VisArtikel.iasp?TemplateID=2377 Name: Politiken: summary Levels: 2 ImageURL: http://politiken.dk/grafik/avantgo/politikendk_logo.gif ImageScaleToMaxWidth: 150 StoryHTMLPreProcess: { s/<BR><BR>/<p>/g; s/<\/CENTER><BR>/<\/CENTER><P>/g; }
sslug-kalender.site:
# Skĺne Sjćllands Linux Brugergruppe kalender # Author: Jarl Friis <jarl@diku.dk> URL: http://www.sslug.dk/adict/mgroup.php?organizer=SSLUG Name: SSLUG kalender Levels: 1 AuthorName: Jarl Friis AuthorEmail: jarl@diku.dk ContentsIncludeStartPattern: 0 ContentsIncludeEndPattern: 0 StoryStart: <!-- End of LUG table --> StoryEnd: <table width="100%" cellspacing="0" cellpadding="0" border="0"> Active: 1
LeMonde1_INT_FRA_STE_REG.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # International France Société Régions #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2037,00.html # International AddURL: http://www.lemonde.fr/sequence/0,2319,2030,00.html # France AddURL: http://www.lemonde.fr/sequence/0,2319,2079,00.html # France Société AddURL: http://www.lemonde.fr/sequence/0,2319,2075,00.html # Régions Name: Le Monde International France Société Régions Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: </body> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde2_HORIZONS.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # Horizons : Débats,Enquętes, Editoriaux #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2044,00.html # Horizons AddURL: http://www.lemonde.fr/sequence/0,2319,2070,00.html # Horizons Débats AddURL: http://www.lemonde.fr/sequence/0,2319,2065,00.html # Horizons Enquętes AddURL: http://www.lemonde.fr/sequence/0,2319,2064,00.html # Horizons Editoriaux Name: Le Monde HORIZONS Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top"> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde3_ENT_COM_PLA_ECO.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # Entreprise Communication Placements Le Monde de l'économie #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2058,00.html # Entreprises AddURL: http://www.lemonde.fr/sequence/0,2319,2061,00.html # Entreprises Communication AddURL: http://www.lemonde.fr/sequence/0,2319,2074,00.html # Placements AddURL: http://www.lemonde.fr/sequence/0,2319,2073,00.html # Le Monde de l'économie Name: Le Monde Entreprise Communication Placements Economie Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top"> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde4_AUJ_SCI_SPO_CULT.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # Aujourd'hui Sciences Sports Culture Théâtre, danse Festivals Photographie Peinture, arts plastiques Cinéma #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2040,00.html # Aujourd'hui AddURL: http://www.lemonde.fr/sequence/0,2319,2077,00.html # Sciences AddURL: http://www.lemonde.fr/sequence/0,2319,2045,00.html # Sports AddURL: http://www.lemonde.fr/sequence/0,2319,2033,00.html # Culture AddURL: http://www.lemonde.fr/sequence/0,2319,2083,00.html # Théâtre, danse AddURL: http://www.lemonde.fr/sequence/0,2319,2192,00.html # Festivals AddURL: http://www.lemonde.fr/sequence/0,2319,2219,00.html # Photographie AddURL: http://www.lemonde.fr/sequence/0,2319,2220,00.html # Peinture, arts plastiques AddURL: http://www.lemonde.fr/service_cinema/0,2331,109-QUO,00.html # Cinéma Name: Le Monde Aujourd'hui Sciences Sports Culture Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top"> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde5_LIVRES.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # Horizons : Débats,Enquętes, Editoriaux #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/service_livres/0,2333,116-QUO,00.html # Le Monde des livres AddURL: http://www.lemonde.fr/service/0,2321,160-QUO,00.html # Littératures AddURL: http://www.lemonde.fr/service/0,2321,156-QUO,00.html # Feuilleton AddURL: http://www.lemonde.fr/service/0,2321,138-QUO,00.html # Essais AddURL: http://www.lemonde.fr/service/0,2321,159-QUO,00.html # Actualités AddURL: http://www.lemonde.fr/service/0,2321,155-QUO,00.html # Chronique AddURL: http://www.lemonde.fr/service/0,2321,117-QUO,00.html # Poches AddURL: http://www.lemonde.fr/service/0,2321,158-QUO,00.html # Jeunesse Name: Le Monde des Livres Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top"> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde6_Interactif.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # LMI : Actus Futurs Branché Techno Business Services Conso Enquętes #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/aietek/0,2327,2039,00.html # LMI Actus AddURL: http://www.lemonde.fr/aietek/0,2327,2043,00.html # LMI Futurs AddURL: http://www.lemonde.fr/aietek/0,2327,2059,00.html # LMI Branché AddURL: http://www.lemonde.fr/aietek/0,2327,2081,00.html # LMI Techno AddURL: http://www.lemonde.fr/aietek/0,2327,2060,00.html # LMI Business AddURL: http://www.lemonde.fr/aietek/0,2327,2078,00.html # LMI Services AddURL: http://www.lemonde.fr/aietek/0,2327,2062,00.html # LMI Conso AddURL: http://www.lemonde.fr/aietek/0,2327,2066,00.html # LMI Enquętes Name: Le Monde Interactif Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: </body> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*\.html
LeMonde7_UNE.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # La Une : Accueil Pierre Georges Liens #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2031,00.html # Accueil AddURL: http://www.lemonde.fr/sequence/0,2319,2199,00.html # Pierre Georges AddURL: http://www.lemonde.fr/article/0,2320,19245,00.html # journaux en ligne AddURL: http://www.lemonde.fr/article/0,2320,19277,00.html # signets technologiques Name: Le Monde Accueil Pierre Georges Liens Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top"> ContentsCacheable: 0 StoryStart: <td width=400> StoryEnd: article_impression StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
LeMonde_AutoMoto.site:
# Le Monde interactif - Tous les nouveaux articles des séquences # Auto Moto #___________________________________________ # Vous pouvez supprimer de la liste ci-aprčs les URL que vous ne lisez pas. # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://www.multimania.com/avm/palm.html # le 10.11.99 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2162,00.html #Auto Moto Name: Le Monde - AutoMoto Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=115 valign="top"> ContentsCacheable: 0 StoryURL: http://www.lemonde.fr/article/.*(QUO|MIA).*\.html
journaldunet.site:
#Le Journal du Net - Articles du jour # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # par: Jacques Turbé http://avm.online.fr # Créé le : 1999-10-20 # Mise ŕ jour: 2000-02-01 Philippe Renard Intégration des interviews URL: http://www.journaldunet.com/defaut.shtml Name: Le_Journal_du_Net Levels: 2 ContentsStart: <!-- Fin du menu dynamique ContentsEnd: Pour tout probl ContentsCachable: 0 StoryURL: http://www.journaldunet.com/\d+/.+\.shtml StoryURL: http://www.journaldunet.com/it_.+\.shtml StoryStart: orps du site StoryEnd: Pour tout probl StoryCacheable: 1
journaldunet_dossiers.site:
#Le Journal du Net - Les NET 20 # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # par: Jacques Turbé http://www.multimania.com/avm/palm.html # Mise ŕ jour: 1999-10-19 URL: http://www.journaldunet.com/dossiers/net20/sommaire_net20.shtml Name: Net 20 Levels: 2 ContentsStart: <!-- Début Corps du site --> ContentsEnd: Pour tout probl ContentsCachable: 0 StoryURL: http://www.journaldunet.com/dossiers/net20/20.*\.shtml StoryStart: <!-- Début Corps du site --> StoryEnd: Pour tout probl StoryCacheable: 1
la_tribune.site:
# La Tribune Edition electronique # Site file for Sitescooper (http://jmason.org/software/sitescooper/) # by: "P.Y. Letournel" <e-py.letournel /at/ wanadoo.fr> # Take care : The first run creates a prc file of around 750K URL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=2#2 # Entreprises AddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=3#3 # Europe AddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=4#4 # Finance AddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=5#5 # France AddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=6#6 # International AddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=7#7 # Marches AddURL: http://www.latribune.fr/Tribune/Articles.nsf/Articles?OpenView&Count=99&Expand=8#8 # Multimedia Name: LaTribune Levels: 2 StoryURL: http://www.latribune.fr/Tribune/Articles.nsf/ArticlesWeb/.* ContentsStart: Condenser la liste # ContentsEnd: StoryStart: Accueil</A> > StoryEnd: EN SAVOIR PLUS # ContentsCacheable: 0
le_monde_full.site:
# Le Monde - Edition électronique # Classé par séquences et sous-séquences # (sans les dossiers) # # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # Mis ŕ jour sur http://avm.online.fr # le 15-02-2000 par Jacques Turbé URL: http://www.lemonde.fr/sequence/0,2319,2031,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2037,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2079,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2030,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2075,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2058,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2061,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2204,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2044,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2070,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2065,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2064,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2077,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2045,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2033,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2083,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2192,00.html AddURL: http://www.lemonde.fr/sequence/0,2319,2199,00.html Name: le_monde_edition_electronique Levels: 2 ContentsStart: <td width=510 valign="top"> ContentsEnd: <td width=125 valign="top"> ContentsCachable: 0 StoryStart: td valign="top" width="500" StoryEnd: >Droits de < StoryURL: http://www.lemonde.fr/article/.*\.html
multimedium.site:
#Multimédium - Actualité du jour # Paramčtres d'extraction de site pour Sitescooper # (http://jmason.org/software/sitescooper/) # par: Philippe Renard hébergé par http://avm.online.fr # Mise ŕ jour: 2000-02-01 URL: http://www.mmedium.com Name: Multimédium Levels: 2 ContentsStart: <!---Actualit ContentsEnd: <!-- fin de la table nouvelles --> ContentsCachable: 1 # StoryURL: http://www.journaldunet.com/\d+/.+\.shtml StoryStart: BUT CONTENU --> StoryEnd: <!-- FIN CONTENU --> StoryCacheable: 1
nouvelobs.site:
# Le Nouvel Observateur # Script de site pour Sitescooper (http://jmason.org/software/sitescooper/) # le 08.11.99 par Jacques Turbé # Mises ŕ jour sur http://www.multimania.com/avm/palm.html URL: http://www.nouvelobs.com/edito/index.html AddURL: http://www.nouvelobs.com/index.html AddURL: http://www.nouvelobs.com/guide/index.html AddURL: http://www.nouvelobs.com/livres/index.html AddURL: http://www.nouvelobs.com/cinemusic.html Name: Le Nouvel Observateur Levels: 2 ContentsStart: <TD width="31%" valign="top"> ContentsEnd: Mode</A> ContentsCacheable: 0 StoryURL: http://www.nouvelobs.com/.*\.html StoryStart: <DATE>SEMAINE StoryEnd: </TEXTE> StoryCachable: 0
de_sz.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/sz/rechts.php Name: Sueddeutsche Description: Süddeutsche Zeitung - wichtigste Ressorts Levels: 3 SizeLimit: 800 IssueLinksStart: <!--zeitungskasten--> IssueLinksEnd: (<!--/zeitungskasten-->|<a href="http://vertrieb.sueddeutsche.de/) ContentsURL: /aktuell/sz/ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } IssueHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_bayern.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort13.php Name: SZ Bayern Description: Bayernteil der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_berlin.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort5.php Name: SZ Berlin Description: Berlin-Seite der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_beruf.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort22.php Name: SZ Bildung & Beruf Description: Beilage Bildung & Beruf der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_drei.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort3.php Name: SZ Seite Drei Description: Seite 3 (Leitartikel) der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_feuilleton.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort7.php Name: SZ Feuilleton Description: Feuilleton der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_hochschule.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort18.php Name: SZ Hochschulseite Description: Hochschulseite der Süddeutschen Zeitung - dienstags Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_immobilien.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort23.php Name: SZ Immobilienseite Description: Immobilienseite der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_kultur.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort8.php Name: SZ Münchner Kultur Description: Ressort Münchner Kultur der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_literatur.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort19.php Name: SZ Literatur Description: Literaturbeilage der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_medien.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort9.php Name: SZ Medien Description: Ressort Medien der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_meinung.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort4.php Name: SZ Meinungsseite Description: Meinungsseite (Seite 4) der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_muenchen.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort12.php Name: SZ München Description: München-Teil der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_panorama.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort6.php Name: SZ Panorama Description: Panorama der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_politik.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort2.php Name: SZ Politik Description: Ressort Politik der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_reise.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort17.php Name: SZ Reise & Erholung Description: Beilage Reise & Erholung der Süddeutschen Zeitung - mittwochs Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_sonder.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort14.php Name: SZ Sonderseiten Description: Sonderseiten der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_sonderbeilage.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort24.php Name: SZ Sonderbeilage Description: Sonderbeilage der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_sport.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort11.php Name: SZ Sport Description: Sportteil der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_streiflicht.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort1.php Name: SZ Streiflicht Description: Titelseiten-Glosse der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_verkehr.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort20.php Name: SZ Auto & Verkehr Description: Beilage Auto & Verkehr der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_wirtschaft.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort10.php Name: SZ Wirtschaft Description: Ressort Wirtschaft der Süddeutschen Zeitung Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_wissen.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort16.php Name: SZ Wissenschaft Description: Wissenschaftsbeilage der Süddeutschen Zeitung - dienstags Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_sz_wochenende.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Peter Marschall, Version 0.5, 6.6.2001 URL: http://www.sueddeutsche.de/aktuell/sz/ressort21.php Name: SZ am Wochenende Description: Wochenendbeilage der Süddeutschen Zeitung - samstags Levels: 2 ContentsURL: (?:http://www.sueddeutsche.de/aktuell/sz/)?ressort\d{1,2}.php ContentsStart: <!--insertion:inhalt--> ContentsEnd: <!--insert_ende:inhalt--> StoryURL: /aktuell/sz/artikel\d{1,6}.php StoryHeadline: <!--insertion:titel-->((?s).*?)<!--insert_ende:titel--> StoryStart: <!--insert_ende:dachtitel--> StoryEnd: <!--insert_ende:inhalt--> StoryLifetime: 2 StoryHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/^.*?<SPAN CLASS="Headline">\s*<!--insertion:titel-->\s*(.*?)\s*<!--insert_ende:titel--><BR>\s*<\/SPAN>/<H2 CLASS="Headline">$1<\/H2>/s; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*<!--insertion:untertitel-->(.*?)\s*<!--insert_ende:untertitel--><BR>\s*<\/SPAN>/<STRONG CLASS="LaufAuszeichnung">$1<\/STRONG><BR>/s; s/\s*<SPAN CLASS="Lauftext">\s*<!--insertion:inhalt-->\s*(?i:Von\s+)?(.*?)\s*<P/<EM>$1<\/EM>\n<P/s; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } ContentsHTMLPreProcess: { s/<IMG ALT="" BORDER="0" HEIGHT="\d+" SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+"><BR>//gs; s/<IMG SRC="..\/pics_zeitung\/trans\.gif" WIDTH="\d+" HEIGHT="\d+" ALT="" BORDER="0"><BR>//gs; s/\s*<SPAN CLASS="LaufAuszeichnung">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<EM CLASS="LaufAuszeichnung">$1<\/EM><BR>\n/gs; s/\s*<SPAN CLASS="Headline">\s*(.*?)\s*<BR>\s*<\/SPAN>\s*/<STRONG><FONT SIZE="6" CLASS="Headline">$1<\/FONT><\/STRONG>\n/gs; s/<SPAN CLASS="Lauftext">\s*(.*?)\s*<BR>\s*<\/SPAN>/<BR>$1\n/gs; s/<SPAN CLASS="Infoschrift">.*?<\/a><BR>\s*<\/SPAN>/<\/a><P>\n/gs; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_cert.site:
# de_cert.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 12.2.03 URL: http://cert.uni-stuttgart.de/ticker/sidebar.php Description: German CERT Infos AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Name: CERT RUS Levels: 2 StoryURL: http://cert.uni-stuttgart.de/ticker/article.php\?mid=\d+ StoryStart: <FONT SIZE="+2"> StoryEnd: Copyright © 2003 RUS-CERT, Universität Stuttgart # remove CENTER StoryPostProcess: { s/center//gi; }
de_computerwoche.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.2, 27.10.1999 URL: http://www.computerwoche.de/info-point/top-news/main.cfm?o=1 Name: Computerwoche Levels: 2 ContentsStart: <TD WIDTH="100%" VALIGN="TOP"> ContentsEnd: Copyright.*Computerwoche Verlag GmbH ContentsCachable: 0 ContentsDiff: 0 StoryURL: http://www.computerwoche.de/info-point/top-news/details.cfm\?id=\d+ StoryStart: </TABLE><br clear=all> StoryEnd: <a href=\"drucken.cfm StoryCachable: 0 StoryPostProcess: { s/<b> *Drucken<\/font><\/b>//gm; }
de_cyberkino.site:
# de_cyberkino.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 7.5.03 URL: http://www.cyberkino.de/entertainment/kino/monate.html Description: German Cinema Infos AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de ContentsDiff: 1 ImageURL: http://www.cyberkino.de/entertainment/kino/.*\.jpg Name: Cyberkino Levels: 2
de_der_pocketstandard.site:
URL: http://derstandard.at/Palm/Titel.htm Name: Der PocketStandard Levels: 3 ContentsPrint: 1 IssuePrint: 1 ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
de_fool.site:
# de_fool.site # This is a sitescooper site file. see http://sitescooper.org/ # by Rodrigo A. Batista, Version 0.1, 13.12.2000 URL: http://www.fool.de/ThemenderWoche/ThemenderWoche.htm Name: MotleyFool DE Description: Weekly News from the German 'The Motley Fool' (financial site) Levels: 2 ContentsStart: <BODYTEXT> ContentsEnd: <\/BODYTEXT> ContentsCachable: 0 ContentsDiff: 1
de_gazette.site:
# de_gazette.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 6.2.03 URL: http://gazette.de/ Name: Die Gazette Description: German plitics magazine AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ImageURL: .*.jpg StoryURL: [A-Za-z]\S+\.html ContentsStart: bordercolor="#CCCCCC" ContentsEnd: /Archiv/Newsletter.html StoryToPrintableSub: { s,([A-Z].+)(\.html),\1-print\2, s,[A-Z],[a-z], } StoryPostProcess: { s/<center>//gi; }
de_gnn.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.2, 27.10.1999 URL: http://www.gnn.de Name: GNN Levels: 2 StoryURL: http://www.gnn.de/99\d\d/\d+-..\.html ContentsStart: <!-- Special --> ContentsEnd: </HTML> StoryStart: <FONT SIZE=-1> StoryEnd: Meldung als eMail verschicken
de_heise.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.2, 27.10.1999 # Modified to correct story titles and to delete links at the end of each story # by Peter Marschall, Version 0.4, 6.6.2001 URL: http://www.heise.de/pda/newsticker/ Name: Heise Newsticker Levels: 2 MinPages: 2 ContentsStart: <\/HEISEADVERT> ContentsEnd: <\/HTML> ContentsCachable: 0 ContentsDiff: 1 StoryURL: http://www.heise.de/pda/newsticker/m\d+\.html StoryStart: <HTML> StoryEnd: </HTML> StoryCacheable: 1 StoryHeadline: <HEISETEXT>\n<B>(.*?)<\/B> StoryHTMLPreProcess: { s/(?:\s*\/\s+)?<P><HR SIZE=1 NOSHADE><B>URL dieses Artikels:<\/B>.*?<P>Copyright 200\d by Verlag Heinz Heise//igs; s/<HEISETEXT>.<B>(.*?)<\/B>/<HEISETEXT><H2>$1<\/H2>/s; }
de_heise_mobil.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.3, 23.5.03 URL: http://heise.de/mobil/ Name: Heise Mobil Description: German Heise Mobil-news Levels: 2 # Ticker ignorieren: ContentsStart: Themen ContentsEnd: <!-- MITTE+RECHTS --> ContentsCachable: 0 ContentsDiff: 1 StoryURL: http://heise.de/mobil/.*/ StoryStart: <HEISETEXT> StoryEnd: </HEISETEXT> StoryCacheable: 1 ImageURL: http://heise.de/mobil/artikel/.*/aufmacher\.jpg # remove small font commands StoryPostProcess: { s/<font size=1>//gi; }
de_heise_tp.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler and Carsten Clasohm, Version 0.3, Thu Aug 03 19:07:32 2000 # minor correction to "StoryToPrintableSub" URL: http://www.heise.de/tp/ Name: Heise Telepolis Levels: 2 ContentsStart: </CONTENTBANNER> ContentsEnd: alt="top of page" ContentsDiff: 1 StoryURL: http://www.heise.de/(tp/.*/\d+/\d.html|bin/tp/issue/dl-artikel.*) StoryCacheable: 1 StoryLifetime: 2 StoryToPrintableSub: s,/tp/.*/(\d+)/\d\.html,/bin/tp/issue/dl-artikel.cgi?artikelnr=\1&rub_ordner=inhalt&mode=html, StoryPostProcess: { s/<font size="\+1">([^<]+)<\/font>/<b>$1<\/b>/sgi; s/<font size="\+2" ?>([^<]+)<\/font><br>/<h2>$1<\/h2>/sgi; }
de_onlinekosten.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Larsen Wulff, http://www.multimediaconnection.de/palmpilot, Date: 28.05.2000 # olinekosten.de URL: http://www.onlinekosten.de Name: Onlinekosten.de Description: Informationen und Preise rund ums Netz Levels: 2 ContentsStart: <!-- MAIN_CONTENT_TABLE START --> ContentsEnd: <!-- MAIN_CONTENT_TABLE ENDE --> ContentsDiff: 1 StoryURL: http://www.onlinekosten.de/news/tt.\d+.html StoryURL: http://www.onlinekosten.de/news/artikel.* StoryDiff: 1
de_pdassi_news.site:
# de_pdassi_news.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 4.3.03 URL: http://pdassi.de/news1.php Name: pdassi News Description: German Palm site AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ImageURL: http://pdassi.de/images/.* StoryToPrintableSub: s/SID=[a-z0-9]+/SID=1/ StoryPostProcess: { s/<small>//gi; }
de_pdassi_software.site:
# de_pdassi_software.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 4.3.03 URL: http://pdassi.de/wcf/newuploads.php AddURL: http://pdassi.de/wcf/newupdates.php AddURL: http://pdassi.de/wcf/newprc.php Name: pdassi Software Description: German Palm site AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ImageURL: http://.*/util/screenshot.php\?pid=\d+.* StoryToPrintableSub: s/SID=[a-z0-9]+/SID=1/ StoryPostProcess: { s/align="center"//gi; s/<small>//gi; }
de_spiegel.site:
# de_Spiegel.site # This is a sitescooper site file. see http://sitescooper.cx/ # by Stefan Schwingeler, Version 0.6, 6.2.03 # History: # "fixed" by by Larsen Wulff, Larsen@multimediaconnection.de # rewritten with new PDA-link (no pics) by Stefan Schwingeler URL: http://www.spiegel.de/dertag/pda/avantgo/0,1958,r20=1@r21=1@r23=1@r10=1@r22=1@r24=1@r19=1@r139=1@r140=1,00.html Name: Der Spiegel Description: German news magazine AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 StoryURL: http://www.spiegel.de/dertag/pda/avantgo/artikel/.*\.html
de_stern.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 10.12.2002 URL: http://www.stern.de/pda/ #AddURL: http://www.stern.de/pda/?pda=1&rubrik=politik #AddURL: http://www.stern.de/pda/?pda=1&rubrik=wirtschaft #AddURL: http://www.stern.de/pda/?pda=1&rubrik=sport #AddURL: http://www.stern.de/pda/?pda=1&rubrik=kultur #AddURL: http://www.stern.de/pda/?pda=1&rubrik=computer #AddURL: http://www.stern.de/pda/?pda=1&rubrik=campus #AddURL: http://www.stern.de/pda/?pda=1&rubrik=wissenschaft #AddURL: http://www.stern.de/pda/?pda=1&rubrik=lifestyle Name: Stern Levels: 2 ImageURL: .*\.jpg # ContentsURL: http://www.stern.de/pda/\?pda=1\&rubrik=.* ContentsStart: <strong>Lifestyle</strong> ContentsEnd: <!-- FOOTER START --> StoryURL: http://www.stern.de/.*/index.html\?id=\d+\&pda=1 StoryStart: Beginn des Artikels StoryEnd: <!-- FOOTER START --> # StoryHeadline: <div id="artikelKopf1">(.*?)</div> StoryHeadline: <h1>(.*?)</h1>
de_tagesschau.site:
# de_tagesschau.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 6.2.03 URL: http://www.tagesschau.de/mobileTS Name: Tagesschau Mobil Description: German news show AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 3 ImageURL: .*/image/.*\.jpg SizeLimit: 1000 Level2Cachable: 0 Level3Cachable: 0 Level4Cachable: 0 ContentsCachable: 0
de_tecchannel.site:
# History: # 1/11/2002: Initial version. URL: http://www.tecchannel.de/mobile/pda/ Name: tecChannel Description: IT News Levels: 2 StoryURL: /mobile/pda/[0-9]+/ ContentsStart: - News</font></b><br><br> StoryEnd: Zurück zur Übersicht ContentsHTMLPreProcess: { s/<hr[^>]*>//gmi; s/<font size="1">- //gmi; s/<\/?font[^>]*>//gmi; s/<\/a><br>/<\/a><br><br>/gmi; s/<b> - /<b>/gmi; s/<br>\s*<br>\s*<br>/<br><br>\n/gmi; } StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi; s/<b><a[^>]*>$//gmi; s/(<br>\s*){3,}/<br><br>/gmi; } AuthorName: Michael Schubart AuthorEmail: michael@schubart.net
de_teltarif.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.2, Sun Jul 16 11:32:59 2000 URL: http://www.teltarif.de/arch/woche.html Name: Teltarif Levels: 2 ContentsStart: <!-- Add Ad End --> StoryURL: http://www.teltarif.de/arch/\d\d\d\d/kw\d+/s\d+\.html StoryStart: <!-- Add Ad End --> StoryEnd: Ihre Meinungen und Erfahrungen
de_tvspielfilm.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler and Carsten Clasohm, Version 1.0, 17.2.2000 # modified by Stefan Schwingeler 25.04.01 11:26: UseTableSmarts: 0 URL: http://www.tomorrow-newmedia.de/mobile/avantgo/tvs/tipps.php # http://www.tomorrow-newmedia.de/mobile/avantgo/tvs/ #http://www.tvspielfilm.de/comm4/tvsdbdumps/besten_filme_kontakter.html #http://www.tvspielfilm.de/comm4/tvsdbdumps/alle_spielfilme_heute.html #http://www.tvspielfilm.de/comm4/tvsdbdumps/data_21_00.html # AddURL: http://www.tvspielfilm.de/comm4/tvsdbdumps/data_23_00.html Name: TV-Spielfilm Levels: 2 ContentsDiff: 0 StoryCachable: 0 StoryURL: http://www.tomorrow-newmedia.de/mobile/avantgo/tvs/gen/.* ImageURL: .+\.gif StoryUseTableSmarts: 0
de_welt.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.1, 24.11.1999 URL: http://www.welt.de/inhalt.htx Name: Die Welt Levels: 2 ContentsStart: Tagesinhalts-Übersicht ContentsEnd: ALLE SEITEN ENDE StoryStart: <meta name="robots" StoryEnd: <P>\ <\/p> StoryURL: http://www.welt.de/daten/\d\d\d\d/\d\d/\d\d/.*\.htx StoryPostProcess: { s/align=center//gmi; s/<center>//gmi; }
de_yahoo.site:
# This is a sitescooper site file. see http://sitescooper.tsx.org/ # by Stefan Schwingeler, Version 0.2, 27.10.1999 URL: http://de.news.yahoo.com/3/ AddURL: http://de.news.yahoo.com/11/ AddURL: http://de.news.yahoo.com/4/ AddURL: http://de.sports.yahoo.com/ AddURL: http://de.news.yahoo.com/33/ AddURL: http://de.news.yahoo.com/2/ AddURL: http://de.news.yahoo.com/41/ AddURL: http://de.news.yahoo.com/9/ Name: Yahoo News DE Levels: 2 ContentsStart: <table cellspacing=0 cellpadding=4> ContentsEnd: <h2>Frühere Meldungen<\/h2> ContentsCachable: 0 StoryURL: http:\/\/de.news.yahoo.com\/99\d+\/\d+\/.*\.html StoryStart: <h2> StoryEnd: <br clear=all> StoryCacheable: 1
mobile2day.site:
# mobile2day.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.2, 6.5.03 URL: http://www.mobile2day.de/pdanews_all_palm.html?n_multi=0&nf_id=0&nt_id=3&f_date_m=0&f_date_y=0&t_date_m=0&t_date_y=0&s_text=&isLimit=1 Name: mobile2day Description: German PDA-News AuthorName: Stefan /at/ Schwingeler.de ContentsDiff: 1 Levels: 2 StoryPostProcess: { s/<CENTER>//gi; s/size=\"1\"//gi; }
palmfaq_de.site:
URL: http://palmfaq.de Name: PalmFAQ.de Levels: 2 ContentsDiff: 1 StoryCacheable: 1
pda_debitel_net.site:
URL: http://pda.debitel.net/ Name: debitel.net Mobile Portal Levels: 4 ImageURL: .*[gif|jpg] SizeLimit: 1000
windows2000faq.site:
# windows2000faq.site # This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, 6.2.03 # this was: ntfaq.site URL: http://www.windows2000faq.com/Articles/Index.cfm?Action=New Name: Windows2000 FAQ AuthorName: Stefan Schwingeler AuthorEMail: stobs /at/ web . de Levels: 2 ContentsStart: New FAQs in the last 30 days ContentsEnd: <--   ContentsDiff: 1 StoryURL: http://www.windows2000faq.com/Articles/Index.cfm\?ArticleID=\d+ StoryStart: CLASS="title" StoryEnd: <b>Related Articles</b> StoryCachable: 1 StoryPostProcess: { s/<CENTER>//gi; }
zdnet_news.site:
# This is a sitescooper site file. see http://sitescooper.org/ # by Stefan Schwingeler, Version 0.1, Date: 030214 URL: http://feeds.zdnet.co.uk/avantgo/news/breaking/ Name: ZDNet News Levels: 2
de_zeit.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Carsten Clasohm, version 1.0, 19.11.1999 # Modified for new style and extended to a 3 level site # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/ Name: Zeit Description: Wochenzeitung Die Zeit Levels: 3 SizeLimit: 800 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 IssueUseTableSmarts: 0 IssueLinksStart: <!-- Sekundaernavigation Anfang -+ --> IssueLinksEnd: IN DER ZEIT ContentsURL: http://www.zeit.de/(?:politik|wirtschaft|kultur|wissen|media|reisen|leben)/ ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Politik/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wirtschaft/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Kultur/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wissen/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Media/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Reisen/\d{5,6}_.*?\.html StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Leben/\d{5,6}_.*?\.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; } IssueHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; }
de_zeit_alternate.site:
# This is a sitescooper site file # by Andreas Mittler, version 1.1, 27.03.2001 URL: http://www.zeit.de/ AddURL: http://www.zeit.de/politik AddURL: http://www.zeit.de/wirtschaft AddURL: http://www.zeit.de/kultur AddURL: http://www.zeit.de/wissen AddURL: http://www.zeit.de/media AddURL: http://www.zeit.de/reisen AddURL: http://www.zeit.de/leben Name: Zeit Levels: 2 StoryURL: http://www.zeit.de/\d+/\d+/.+/\d+_.+\.html StoryStart: <br><br> StoryCacheable: 1
de_zeit_kultur.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/kultur/ Name: Zeit Kultur Description: Wochenzeitung Die Zeit - Ressort Kultur Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Kultur/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_leben.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/leben/ Name: Zeit Leben Description: Wochenzeitung Die Zeit - Ressort Leben Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Leben/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_politik.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/politik/ Name: Zeit Politik Description: Wochenzeitung Die Zeit - Ressort Politik Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Politik/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_reisen.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/reisen/ Name: Zeit Reisen Description: Wochenzeitung Die Zeit - Ressort Reisen Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Reisen/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_wirtschaft.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/wirtschaft/ Name: Zeit Wirtschaft Description: Wochenzeitung Die Zeit - Ressort Wirtschaft Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wirtschaft/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
de_zeit_wissen.site:
# This is a sitescooper site file. See http://sitescooper.tsx.org # by Peter Marschall, version 1.1, 6.6.2001 URL: http://www.zeit.de/wissen/ Name: Zeit Wissen Description: Wochenzeitung Die Zeit - Ressort Wissen Levels: 2 # Uncomment the next two lines to get images too (width setting is for iSilo) #ImageURL: (?:http://www.zeit.de/)?bilder/\d{4}/\d{1,2}.*?\.(?:jpg|png|gif) #ImageScaleToMaxWidth: 152 ContentsCacheable: 0 ContentsStart: <!-- Teaser Reise, Leben, Kulturkalender --> ContentsEnd: <!-- rechte spalte anfang --> StoryURL: http://www.zeit.de/\d{4}/\d{1,2}/Wissen/\d{5,6}_.*.html StoryStart: <b class=.xs.> StoryEnd: (<!-- /Bibliographie -->|\(c\) DIE ZEIT) StoryHeadline: class=.subart.>(?s:.*?-->.*?-->\s*)(.*)<\/font><br> StoryCacheable: 1 # Die Zeit comes weekly on thursdays - ignore it on other days #EvaluatePerl: { # $skip_site = (localtime()[6] != 4); #} ContentsHTMLPreProcess: { s/<!-- Spitzmarke Anfang -+-->.*?<!-- Spitzmarke Ende -+-->\s*//gs; s/(<br>\s*)+<br>/<br>/isg; s/<font [^>]+class="snippletTm".*?>.*?<\/font>//sg; s/<a [^>]+class="mehr".*?>\[Diesen Artikel kommentieren\]<\/a>//sg; s/<b>\s*>{1,2}<\/b>//sg; s/»//sg; s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; } StoryHTMLPreProcess: { s/\s*(?:(<br>|<p>)\s*)+(<br>|<p>)/$1/isg; s/<div.*?>//isg; s/<\/div.*?>/<P>/isg; s/>\s*Fotos?[:].*?</></gs; }
freebsd_hu.site:
# FreeBSD.hu # Hubidubi (hubidubi@freemail.hu) URL: http://www.freebsd.hu/pda Name: FreeBSD.hu Levels: 1 StoryDiff: 1 ContentsDiff: 1 # If you don't want the logo, comment this line out! ImageURL: http://www.freebsd.hu/images/.*\.jpg
hup_hu.site:
# HUP.hu # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.hup.hu/modules.php?name=PDA Name: HUP Levels: 2 # If you don't want the logo, comment this line out! ImageURL: http://www.hup.hu/images/.*\.png StoryPostProcess: { s/Dátum//gm; s/Cím//gm; }
linux_hu.site:
# Linux.hu # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.linux.hu Name: Linux.hu StoryEnd: Ha te is olvastál StoryPostProcess: { s/" WIDTH=1 HEIGHT=1>//gm; s/Ha te is olvastál.*k!//gm; }
linuxforum_hu.site:
# Linuxforum.hu # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.linuxforum.hu/modules.php?name=AvantGo Name: Linuxforum Levels: 2 # If you don't want the logo, comment this line out! ImageURL: http://www.linuxforum.hu/images/.*\.gif StoryPostProcess: { s/Date//gm; s/Cím//gm; }
linuxonline_hu.site:
# LinuxOnline.hu # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.linuxonline.hu/modules.php?name=AvantGo Name: LinuxOnline Levels: 2 # If you don't want the logo, comment this line out! ImageURL: http://www.linuxonline.hu/images/.*\.gif StoryPostProcess: { s/Date//gm; s/Cím//gm; }
metro_hu.site:
# Metro.hu # Author: Hubidubi (hubidubi@freemail.hu) URL: http://pda.metro.hu/index.html Name: Metro Levels: 3
pdamania_hu.site:
# PDAMania # Author: Hubidubi (hubidubi@freemail.hu) URL: http://www.pdamania.hu/services/pdaedition Name: PDAMania.hu Levels: 3 StoryStart: <b>::
terminal_hu.site:
# terminal.hu is a Hungarian internet news file. # Author: dlux (dlux@kapu.hu) # Distributed as the part of the sitescooper package URL: http://www.terminal.hu/ Name: Terminal.hu Levels: 2 StoryStart: End Ad StoryEnd: olvasó doboz StoryURL: http://www.terminal.hu/cikk\.php3\?id=\d+ StoryHeadline: <TITLE>.*?([^:]*?)</TITLE>
accuweather_dublin.site:
# Accuweather Dublin # you'll have to go to Accuweather site, find your city and # copy url to the line below URL: http://www.accuweather.com/adcbin/intlocal_index?wxcity2=DUBLIN&wxcountry=EU;IE AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: Accuweather Dublin Levels: 1 # other details are picked up automatically from ../lib/layouts.site
evilgerald.site:
URL: http://www.evilgerald.com/ Name: The Evil Gerald Levels: 2 ContentsUseTableSmarts: 0 StoryURL: http://www.evilgerald.com/Issues/Issue\d+/\S+
hackwatch.site:
URL: http://www.iol.ie/~kooltek/welcome.html Name: Hack Watch News Description: hacks, internet news, telecoms, digital TV, crypto and more, by John McCormac Levels: 1 StoryStart: Last Updated At:[^<]+ StoryEnd: -- Begin Current News Headlines -- StoryDiff: 1
irish_aertel_listings.site:
URL: http://www.rte.ie/aertel/P171.HTM Name: Aertel TV Listings StoryStart: <font face="Verdana, Arial, Helvetica, sans-serif" size="3" color=".FFFFFF">TV TODAY</font></td> StoryEnd: <-- Previous Page</a> UseTableSmarts: 0 AddURL: http://www.rte.ie/aertel/P172.HTM AddURL: http://www.rte.ie/aertel/P173.HTM AddURL: http://www.rte.ie/aertel/P174.HTM AddURL: http://www.rte.ie/aertel/P175.HTM AddURL: http://www.rte.ie/aertel/P176.HTM StoryPostProcess: { s/^\s+//gm; s/^(\d)/\n$1/gm; s/^(Please reload |MAIN INDEX |MAIN NEWS INDEX ).*$//gm; s/^(TV Extra |ENTERTAINMENT INDEX |Last Updated: ).*$//gm; s/^(NEWS HEADLINES ).*$//gm; s/\n\s*\n+/\n\n/gs; s/<hr align="left" size="1" width="350" [^>]+>//gs; }
linux_ie.site:
URL: http://www.linux.ie/ Name: Linux.ie Description: Home of the Irish Linux User Group # Thanks to Ken Guest, ILUG webmaster guy, for adding some sitescooper-friendly # comments to the page. Hopefully this'll be HTML-spring-clean proof for quite # a while! Levels: 2 ContentsStart: <b>Linux.ie Homepage</b><BR><BR> ContentsEnd: <a href="http://www.indigo.ie/"><img src="/images/indigopower.gif" border="0"></a> ContentsPrint: 1 TableRender: flatten StoryURL: /(reviews|tutorials|articles)/.*\.html StoryStart: <TABLE BORDER=0 CELLSPACING=0 CELLPADDING=0 WIDTH="100%"> StoryEnd: </HTML>
rte_news_online.site:
URL: http://www.rte.ie/news/ Name: RTE News Online Levels: 2 Description: News from RTE, Ireland's national broadcaster ContentsStart: DO NOT REMOVE COMMENTS BELOW THIS LINE ContentsEnd: © \d+ RTÉ News & RTÉ Online StoryURL: http://www.rte.ie/news/[[YYYY]]/\d+/\S+.html StorySkipURL: .*/(morningireland|1news|newsatone|6news|nationwide|9news|qanda).* StoryStart: (THE SIDEBAR CONTENT ENDS|--TEXT TABLE--) StoryEnd: (--AUDIO . VIDEO GOES HERE--|© 1999 RTÉ News) # allow the >More... image to work ImageURL: http://www.rte.ie/news/navimages/more.gif # remove all table entries for this site. TableRender: flatten
volta_netgains.site:
URL: http://www.voltapublishing.com/cgi-bin/news/index.cgi?f=keyword&keywords=Ireland_shorts Name: Volta NetGains Description: high-quality daily news round-up of Internet issues and digital media Levels: 1 StoryStart: <P><B>Search our news archives.</B></P> StoryEnd: </HTML> StoryDiff: 1
jerusalem_post.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost%2FP%2FFrontPage%2FFrontPage&cid=1002116796299 Name: JPost Description: The Jerusalem Post Levels: 3 SizeLimit: 2000 AuthorName: David Resnick # You've got to import your cookies for this to work RequireCookie: www.jpost.com Aly ######### # Issue # ######### IssueUseTableSmarts: 0 IssueLinksStart: SECTIONS IssueLinksEnd: SectionIndex&cid=1006953080001 IssueCachable: 0 ############ # Contents # ############ ContentsUseTableSmarts: 0 ContentsCachable: 0 ContentsStart: CLASS="lead"> ContentsEnd: VALIGN="TOP"><!--OMKT--> # Add sections here, but make sure to increase the IssueLinksEnd above # Today's Paper contents ContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006688055060.* # editorial contents ContentsURL: .*pagename=JPost/P/Opinion/SectionIndex&cid=1006953079865.* # columns contents ContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006953079897.* # latest news contents ContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1008596981749.* # Arts & Leisure contents ContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006953079917.* # Features contents ContentsURL: .*pagename=JPost/P/Section/SectionIndex&cid=1006953079845.* ######### # Story # ######### StoryURL: .*pagename=JPost/JPArticle.* StoryUseTableSmarts: 0 StoryCachable: 0 # StoryStart: <span CLASS="topstory"> # something wrong with the story start check, used to work with the above format StoryStart: "topstory" StoryEnd: Printer Friendly StoryHeadline: topstory(.*)span
haaretz.site:
URL: http://www.haaretz.com/ Name: Haaretz Description: Haaretz.com Levels: 3 SizeLimit: 5000 AuthorName: David Resnick ######### # Issue # ######### IssueUseTableSmarts: 0 IssueLinksStart: class="t13BNew" IssueLinksEnd: subContrassID=6 IssueCachable: 0 ############ # Contents # ############ ContentsUseTableSmarts: 0 ContentsCachable: 0 ContentsStart: class="t18BBordo" ContentsEnd: Top.gif # News contents ContentsURL: .*subContrassID=1.* # Business ContentsURL: .*subContrassID=2.* # editorial contents ContentsURL: .*subContrassID=4.* # Features contents ContentsURL: .*subContrassID=5.* ######### # Story # ######### StoryURL: .*hasen/spages.* StoryUseTableSmarts: 0 StoryCachable: 0 StoryStart: class="t18B" StoryEnd: HTTP-EQUIV="PRAGMA"
jpost-columns.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1006953079897 Name: JPost-columns Description: The Jerusalem Post Levels: 2 SizeLimit: 150 AuthorName: David Resnick # You've got to import your cookies for this to work RequireCookie: www.jpost.com Aly ############ # Contents # ############ ContentsStart: <B>Columns</B> ContentsEnd: IST</P> ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis; } ######### # Story # ######### StoryURL: .*pagename=JPost/JPArticle.* StoryStart: <TD VALIGN="TOP" CLASS="byline"> StoryEnd: <TD CLASS="sect"> StoryFollowLinks: 1
jpost-international.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1078113566627 Name: JPost-international Description: The Jerusalem Post Levels: 2 SizeLimit: 150 AuthorName: David Resnick # You've got to import your cookies for this to work RequireCookie: www.jpost.com Aly ############ # Contents # ############ ContentsStart: <B>International</B> ContentsEnd: IST</P> ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis; } ######### # Story # ######### StoryURL: .*pagename=JPost/JPArticle.* StoryStart: <TD VALIGN="TOP" CLASS="byline"> StoryEnd: <TD CLASS="sect"> StoryFollowLinks: 1 StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis; }
jpost-israel.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1078027574097 Name: JPost-Israel Description: The Jerusalem Post Levels: 2 SizeLimit: 150 AuthorName: David Resnick # You've got to import your cookies for this to work RequireCookie: www.jpost.com Aly ############ # Contents # ############ ContentsStart: <B>Israel</B> ContentsEnd: IST</P> ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis; } ######### # Story # ######### StoryURL: .*pagename=JPost/JPArticle.* StoryStart: <TD VALIGN="TOP" CLASS="byline"> StoryEnd: <TD CLASS="sect"> StoryFollowLinks: 1 StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis; }
jpost-me.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Section/IndexPhoto&cid=1101615860782 Name: JPost-ME Description: The Jerusalem Post Levels: 2 SizeLimit: 150 AuthorName: David Resnick # You've got to import your cookies for this to work RequireCookie: www.jpost.com Aly ############ # Contents # ############ ContentsStart: <B>Middle East</B> ContentsEnd: IST</P> ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis; } ######### # Story # ######### StoryURL: .*pagename=JPost/JPArticle.* StoryStart: <TD VALIGN="TOP" CLASS="byline"> StoryEnd: <TD CLASS="sect"> StoryFollowLinks: 1 StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis; }
jpost-opinion.site:
URL: http://www.jpost.com/servlet/Satellite?pagename=JPost/P/Opinion/SectionIndex&cid=1006953079865 Name: JPost-opinion Description: The Jerusalem Post Levels: 2 SizeLimit: 150 AuthorName: David Resnick # You've got to import your cookies for this to work RequireCookie: www.jpost.com Aly ############ # Contents # ############ ContentsStart: <B>Opinion</B> ContentsEnd: EDITOR'S PICKS ContentsHTMLPreProcess: { s!onClick="this.href=FCx\(this.href\);"!!gis; } ######### # Story # ######### StoryURL: .*pagename=JPost/JPArticle.* StoryStart: <TD VALIGN="TOP" CLASS="byline"> StoryEnd: <TD CLASS="sect"> StoryFollowLinks: 1 StoryHTMLPreProcess: { s!<SPAN CLASS="byline"></SPAN>.*<SPAN CLASS="lead">!!gis; s!<P CLASS="bottomline" ALIGN="CENTER">Advertisement<BR>!!gis; }
jp_japan_times_business.site:
# History: # 12/30/2001: Initial version. URL: http://www.japantimes.com/business.htm Name: Japan Times Business Description: English language news of Japan, "Business" section. Levels: 2 StoryURL: /cgi-bin/getarticle.pl.* ContentsStart: </CENTER><BR> ContentsEnd: <! bottombarstart> StoryStart: <! staticstart> StoryEnd: <! bottombarstart> ContentsHTMLPreProcess: { s/<\/font><\/a><br>\n\n/<\/a><br><br>/gmi; s/<\/?font[^>]*>//gmi; } StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi; } AuthorName: Michael Schubart AuthorEmail: michael@schubart.net
jp_japan_times_news.site:
# History: # 12/30/2001: Initial version. URL: http://www.japantimes.com/news.htm Name: Japan Times News Description: English language news of Japan, "News" section. Levels: 2 StoryURL: /cgi-bin/getarticle.pl.* ContentsStart: </CENTER><BR> ContentsEnd: <! bottombarstart> StoryStart: <! staticstart> StoryEnd: <! bottombarstart> ContentsHTMLPreProcess: { s/<\/font><\/a><br>\n\n/<\/a><br><br>/gmi; s/<\/?font[^>]*>//gmi; } StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi; } AuthorName: Michael Schubart AuthorEmail: michael@schubart.net
jp_daily_yomiuri_english.site:
# History: # 12/31/2001: Initial version. URL: http://www.yomiuri.co.jp/main/main-e.htm Name: Daily Yomiuri English Description: English language news of Japan Levels: 2 StoryURL: /newse/.* ContentsStart: Latest news as of: ContentsEnd: <!-- Language Labo --> ContentsHTMLPreProcess: { s/<img[^>]*alt="([^"]*)"[^>]*>/<b>$1<\/b>/gmi; s/ target = "main"//gmi; s/<\/?ul>//gmi; s/<li>/<br>/gmi; s/<\/?font[^>]*>//gmi; } StoryHTMLPreProcess: { s/<\/?font[^>]*>//gmi; } AuthorName: Michael Schubart AuthorEmail: michael@schubart.net
ny_post.site:
URL: http://www.nypost.com/avantgo/ Name: New York Post Levels: 3
christchurch_press.site:
# Christchurch Press Site coverted to doc format using sitescooper # # URL: http://www.stuff.co.nz/inl/index/0,1008,0a1561,FF.html Name: Christchurch Press Levels: 2 ContentsStart: All the material on this page has the protection of international copyright. All rights reserved ContentsEnd: TOP OF PAGE StoryStart: All the material on this page has the protection of international copyright. All rights reserved StoryEnd: TOP OF PAGE StoryURL: http://www.stuff.co.nz/inl/index/.*\.html
gist_tv.site:
# gist_tv.site # For gist.com TV Listings # # To customize these listings (the "uid" in the URL below is for my settings), set up a # normal account at gist.com, set up the "Handheld Gist" feature on the main page, # do any setup necessary for the handheld version (the service seems a little shaky right now, # it may take some work). # # When you get to the step where you are supposed to click "submit" to install the # AvantGo channel, just view the source of the page you're looking at instead and look # for the uid number in the source near the submit button. Replace it below, and you're all # set. Put the URL in your browser to make sure you've got it set up right. # # Levels: 3 because that's what the gist AvantGo .subs file indicated. Not sure why. # AuthorName: Justin Henry <jhenry@fjicl.com> URL: http://avantgo.gist.com/tv/avantgo/index.jsp?uid=541624 Name: GIST TV Listings Levels: 3 ImageURL: .*
whyytv12.site:
URL: http://www.whyy.org/pda/index.html Name: WHYY Philadelphia TV12/91FM Levels: 3 ContentsPrint: 1 IssuePrint: 1 ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
ctc-movies-metro.site:
URL: http://www.clickthecity.com/movies/metrolist.asp Name: ClickTheCity.com - Metro Manila Movie Guide Levels: 3 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.ph ContentsURL: /movies/movie.asp.movid=.* StoryURL: /movies/theater.asp.theid=.* IssueLinksStart: Movie Guide Home IssueLinksEnd: active.macromedia.com ContentsStart: Movie Guide Home ContentsEnd: active.macromedia.com StoryStart: Movie Guide Home StoryEnd: active.macromedia.com
inq7.site:
URL: http://news.inq7.net/express/html_output/ Name: INQ7 Express Description: The Philippine Daily Inquirer and GMA Network News Web site Levels: 2 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg-sitescooper /at/ kssp.upd.edu.ph StoryURL: /express/html_output/.* StoryURL: http://money\.inq7\.net/topstories/printable_topstories\.php.* ImageURL: /express/html_output/.*\.gif ImageURL: http://www\.inq7money\.net/images/header/.*\.gif ContentsStart: class="mainContent"> ContentsEnd: <!-- Creative for 120x600 format --> StoryStart: class="mainContent"> StoryEnd: <!-- Creative for 120x600 format -->
seattle_p_i.site:
# Site file contributed by Jason Simpson <jason /at/ xio.com> # URL: http://seattlepi.nwsource.com/pitogo/ Name: Seattle P-I Levels: 2 # ContentsStart: </center> # ContentsEnd: </body> StoryURL: \S+\.shtml # StoryStart: </center> # StoryEnd: </body>
elmundo_culture.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/cultura/index.html Name: El Mundo Cultura Description: Culture news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/cultura/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_economy.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/economia/index.html Name: El Mundo Economia Description: Economy news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/economia/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_europe.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/europa/index.html Name: El Mundo Europa Description: Europe news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/europa/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_international.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/internacional/index.html Name: El Mundo Internacional Description: International news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/internacional/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_national.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/espana/index.html Name: El Mundo Nacional Description: Spain news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/espana/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_society.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/sociedad/index.html Name: El Mundo Sociedad Description: Society news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/sociedad/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
elmundo_sports.site:
# Author: Sergi Pusó <sergi /at/ iagora.net> URL: http://www.elmundo.es/diario/deportes/index.html Name: El Mundo Deportes Description: Sports news from spanish newspaper El Mundo Levels: 2 StoryURL: http://www.elmundo.es/diario/deportes/.*.html StoryStart: <!--FIN CABECERA--> StoryEnd: <!--TABLA VOTACION-->
le_temps.site:
# script sitescooper pour le site du Temps, quotidien suisse # édité ŕ Genčve # écrit par Vincent Oberson (reverso@club-internet.fr) # avec les précieux conseils de Jacques Turbé et Pierre-Yves Letournel URL: http://www.letemps.ch/template/default.asp?page=sommaire Name: Le Temps Levels: 2 StoryURL: http://www.letemps.ch/template/.* ContentsStart: >: < ContentsEnd: <a name="débat"> StoryStart: document.frmPrint.submit() StoryEnd: La Une
globe_and_mail_columnists.site:
# The Globe and Mail is a general interest newspaper # based in Toronto, Canada. # # This script scoops the Stories presented in # the paper's National Columnists section. URL: http://www.globeandmail.com/generated/hubs/current/nationalColumnists.html Name: G&M Columnists Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_National.html ends --> ContentsEnd: Complete Index of Today's Print Headlines</b></font></a> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=national\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --> # This story processor slows things down a lot, but # it removes the annoying text "PRINT EDITION" that # appears above every story. StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg; }
globe_and_mail_national.site:
# The Globe and Mail is a general interest newspaper # based in Toronto, Canada. # # This script scoops the National news stories # presented on the paper's homepage. URL: http://www.globeandmail.com/national/ Name: G&M National Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_National.html ends --> ContentsEnd: <b>Additional National Stories</b> # Use the following if you want to include the "Additional National # Stories" at the bottom of the page: # # ContentsEnd: <!-- /fragments/completeheadlineindex.html begins --> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=national\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --> # This story processor slows things down a lot, but # it removes the annoying text "PRINT EDITION" that # appears above every story. StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg; }
globe_and_mail_thearts.site:
# The Globe and Mail is a general interest newspaper # based in Toronto, Canada. # # This script scoops the stories presented in # the paper's "The Arts" section. URL: http://www.globeandmail.com/thearts/ Name: G&M The Arts Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_TheArts.html ends --> ContentsEnd: <!-- /fragments/completeheadlineindex.html begins --> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=thearts\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --> # This story processor slows things down a lot, but # it removes the annoying text "PRINT EDITION" that # appears above every story. StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg; }
globe_and_mail_toronto.site:
# The Globe and Mail is a general interest newspaper # based in Toronto, Canada. # # This script scoops the Stories presented in # the paper's National Toronto section. URL: http://www.globeandmail.com/generated/hubs/current/nationalToronto.html Name: G&M Toronto Levels: 2 ContentsStart: <!-- /fragments/nav/HubNav_National.html ends --> ContentsEnd: Complete Index of Today's Print Headlines</b></font></a> # StoryURL: http://www\.globeandmail\.com/servlet/\S*&hub=national StoryURL: http://www\.globeandmail\.com/servlet\S*hub=national\S* StoryStart: <!-- Full Story Header --> StoryEnd: <!-- Full Story Footer --> # This story processor slows things down a lot, but # it removes the annoying text "PRINT EDITION" that # appears above every story. StoryPostProcess: { s{^<.*><b>PRINT EDITION</b><.*>$}{}mg; }
bbc_news_front.site:
URL: http://news.bbc.co.uk/text_only.stm Name: BBC Front Page Levels: 2
bbc_news_health.site:
URL: http://news.bbc.co.uk/low/english/health/default.htm Name: BBC News Health Levels: 2
bbc_news_sci_tech.site:
URL: http://news.bbc.co.uk/low/english/sci/tech/default.htm Name: BBC News Sci-Tech Levels: 2 ContentsDiff: 1
bbc_news_world.site:
URL: http://news.bbc.co.uk/low/english/world/default.htm Name: BBC World News Levels: 2
the_guardian.site:
# The UK Guardian # Site file for Sitescooper (http://jmason.org/software/sitescooper/) # Written by: Jason Yanowitz <yanowitz /at/ poboxes.com> # Last updated: Oct 2 2001 jm URL: http://www.guardian.co.uk/guardian/todays_stories/ Name: UK Guardian Levels: 2 StoryURL: http://www.guardian.co.uk/.*/story/.* StoryStart: <.-- Navbar: no scribbling --><.-- Vignette StoryServer 4 [^>]+ --> StoryEnd: riangle_up.gif StoryHeadline: Guardian Unlimited \| The Guardian \| (.*)
gabriels_mobile_channel.site:
# Gabriel's Mobile Channel # Weekly Roman Catholic lectionary readings, daily meditations, and # religious news # Site file by Joe Pfeiffer, pfeiffer /at/ cs.nmsu.edu # URL: http://www.erienet.net/~stjoseph/ppcchannel/gabehomepp.html Name: Gabriels Channel Levels: 3 IssueUseTableSmarts: 0 IssueLinksStart: (<BODY>|<body>) IssueLinksEnd: (</BODY>|</body>) ContentsStart: (<BODY>|<body>) ContentsEnd: (</BODY>|</body>) # ContentsPrint: 1 StoryDiff: 1
scifiwire.site:
# Author: MMiller /at/ media-general.com (thanks!) URL: http://www.scifi.com/scifiwire/handheld.html Name: SciFi Wire Levels: 2 StoryDiff: 1
archaeology_org.site:
URL: http://www.archaeology.org/online/news/index.html Name: Archaeology Org News Levels: 2 ContentsStart: <!--Begin main table--> ContentsEnd: <!-- End news--> StoryURL: http://.*
explorezone.site:
# URL thanks to http://members.bellatlantic.net/~blumax/plink.html URL: http://explorezone.space.com/go/ Name: ExploreZone Levels: 2 StoryURL: http://explorezone.space.com/go/.* ImageURL: http://explorezone.space.com/go/.* StoryDiff: 1 StoryCacheable: 1
grahamhancock.site:
URL: http://www.grahamhancock.com/news/index.php Name: Hancock Levels: 2 ContentsStart: <div class=newsItem id=newsItem> ContentsEnd: <a href="/news/index.php?archive=1" target="_top" title="News Desk Archive" class="text">News desk archive...</a> StoryURL: http://.*
new_scientist.site:
URL: http://www.newscientist.com/inprint/ Name: New Scientist Levels: 2 AddURL: http://www.newscientist.com/news/ StoryURL: http://www.newscientist.com/\S+/\S+_\d+\.htm.* StoryURL: http://www.newscientist.com/\S+/\S+\.jsp\?id=\S+ ContentsStart: (magazine contents|Index Table) ContentsEnd: More than 1500 science, technology and academic vacancies StoryStart: <b class="(?:newsarthead|letterhead|heading|bktitle|intvwhead)"> StoryEnd: For exclusive insights into the most important developments in StoryHeadline: <b class="(?:newsarthead|letterhead|heading|bktitle|intvwhead)">(.*?)</b>
new_scientist_news.site:
# New Scientist News in RSS format URL: http://www.newscientist.com/feed.ns;jsessionid=HOGGBFOGNOAA?index=online-news Name: New Scientist News ContentsFormat: rss ContentsDiff: 1 StoryToPrintableSub: s,(id=.+),$1\&print=true, StoryToPrintableSub: s,\&feedId=.*,\&print=true, StoryStart: <div id="printbody"> StoryEnd: <div class="artlinks">
science_daily.site:
URL: http://www.sciencedaily.com/news/summaries.htm AuthorName: Derek Glidden <dglidden /at/ illusionary.com> Name: Science Daily Headlines Levels: 2 StoryURL: http://www.sciencedaily.com/releases/.* StoryStart: NEXT StoryEnd: RELATED
smithsonian.site:
URL: http://www.smithsonianmag.si.edu/smithsonian/toccurrent.shtml Name: Smithsonian Description: Smithsonian Magazine Levels: 2 StoryURL: http://www.smithsonianmag.si.edu/smithsonian/.*
spaceref.site:
URL: http://www.spaceref.com/avantgo/ AuthorName: Derek Glidden <dglidden /at/ illusionary.com> Name: SpaceRef.com Levels: 3 ContentsURL: http://www.spaceref.com/avantgo/(srnews|pressr|events).html ContentsPrint: 1 StoryURL: http://www.spaceref.com/avantgo/viewnews.html?.* StoryURL: http://www.spaceref.com/avantgo/viewpr.html?.* StoryURL: http://www.spaceref.com/avantgo/calendar.html?.*
crypto_gram.site:
URL: http://www.schneier.com/crypto-gram-back.html Name: Crypto-Gram Levels: 2 ContentsStart: <!--* begin page content --*> ContentsEnd: <!--* end page content --*> StoryURL: /crypto-gram-[[YY]]([[MM]]|[[MM-1]]|[[MM-2]]|[[MM-3]])\.html StoryStart: <!--* begin page content --*> StoryEnd: <!--* end page content --*> # fixed by Derek Glidden <dglidden /at/ illusionary.com> # and Adrian Colley aecolley /at/ spamcopdotnet
cryptome.site:
URL: http://jya.com/crypto.htm Name: Cryptome Levels: 2 ContentsStart: <HTML> ContentsEnd: </HTML> StoryURL: http://cryptome.org/\S+\.html? ContentsDiff: 1
GSR_Appearance_Mods.site:
#GS Resources Appearance Mods Forum #You will appreciate this site more if you #are a diehard fan of Classic Suzuki Motorcycles URL: http://www.thegsresources.com/_forum/viewforum.php?f=8 Name: GSR Appearance Mods Description: Suzuki GS Resources Appearance Mods AuthorName: Delmer Wells -- delmer at delmer dot com Levels: 2 ContentsStart: nowrap="nowrap"> Last Post </th> ContentsEnd: <td class="catBottom" #The following strips out view count and last poster's name ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a
GSR_Bike.site:
#GS Resources Bike of the Month #You will appreciate this site more if you #are a diehard fan of Classic Suzuki Motorcycles URL: http://www.thegsresources.com/gs_photo.htm Name: GSR Bike of The Month Description: Suzuki GS Page StoryDiff: 1 ImageOnlySite: 1 ImageURL: http://www.thegsresources.com/images/monthly_photo/.*jpg ImageScaleToMaxWidth: 500 AuthorName: Delmer Wells Levels: 2
GSR_General_Disc.site:
#GS Resources General Discussion Forum #You will appreciate this site more if you #are a diehard fan of Classic Suzuki Motorcycles URL: http://www.thegsresources.com/_forum/viewforum.php?f=3 Name: GSR General Discussion Description: Suzuki GS Page AuthorName: Delmer Wells Levels: 2 # ContentsUseTableSmarts: 1 # TableRender: list #ContentsFormat: RSS ContentsStart: Announcement: ContentsEnd: <td class="catBottom" #gensmall"></span></td> ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } # name= "jumpbox" # StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+ StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a # StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Owners.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=4 Name: GSR Owners Description: Suzuki GS Page AuthorName: Delmer Wells AuthorEmail: delmer at delmer dot com Levels: 2 # ContentsUseTableSmarts: 1 # TableRender: list #ContentsFormat: RSS ContentsStart: nowrap="nowrap"> Last Post </th> ContentsEnd: <td class="catBottom" #gensmall"></span></td> #The following strips out view count and last poster's name ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } # name= "jumpbox" # StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+ StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a # StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Performance_Mods.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=9 Name: GSR Performance Mods Description: Suzuki GS Performance Mods AuthorName: Delmer Wells -- delmer at delmer dot com Levels: 2 ContentsStart: nowrap="nowrap"> Last Post </th> ContentsEnd: <td class="catBottom" #The following strips out view count and last poster's name ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a
GSR_Stories.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=5 Name: GSR Stories Forum Description: Suzuki GS Page AuthorName: Delmer Wells Levels: 2 # ContentsUseTableSmarts: 1 # TableRender: list #ContentsFormat: RSS ContentsStart: nowrap="nowrap"> Last Post </th> ContentsEnd: <td class="catBottom" #gensmall"></span></td> #The following strips out view count and last poster's name ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } # name= "jumpbox" # StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+ StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a # StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Technical.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=7 Name: GSR Technical Forum Description: Suzuki GS Page AuthorName: Delmer Wells Levels: 2 # ContentsUseTableSmarts: 1 # TableRender: list #ContentsFormat: RSS ContentsStart: nowrap="nowrap"> Last Post </th> ContentsEnd: <td class="catBottom" #gensmall"></span></td> #The following strips out view count and last poster's name ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } # name= "jumpbox" # StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+ StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a # StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
GSR_Tips-n-Tricks.site:
URL: http://www.thegsresources.com/_forum/viewforum.php?f=11 Name: GSR Tips & Tricks Description: Suzuki GS Page AuthorName: Delmer Wells Levels: 2 # ContentsUseTableSmarts: 1 # TableRender: list #ContentsFormat: RSS ContentsStart: nowrap="nowrap"> Last Post </th> ContentsEnd: <td class="catBottom" #gensmall"></span></td> #The following strips out view count and last poster's name ContentsHTMLPreProcess: { s/<td class="row2".*?<td class="row1" width="100%"//gs; } # name= "jumpbox" # StoryURL: http://www.thegsresources.com/_forum/viewtopic.php?t=\d+ StoryStart: Posted: StoryEnd: class= <span class="topictitle"><a # StoryHeadline: <META NAME="headline" CONTENT="(.*?)">
cnn_sports.site:
# CNN Sports URL: http://wireless.cnn.com/avantgo/CNNSI/en/ # created from PODS file by David A. Desrosiers AuthorName: Marko Bozikovic <marko.bozikovic /at/ envox.hr> Name: CNN Sports Levels: 2 ImageURL: .*\.gif ImageScaleToMaxWidth: 150 ContentsCachable: 0 StoryURL: http://wireless.cnn.com/avantgo/CNNSI/.* StoryCachable: 1
mobilebikes.site:
URL: http://www.mobilebikes.net/mobile/home.htm Name: MobileBikes Levels: 4 AuthorName: Barry Dexter A. Gonzaga AuthorEMail: barryg /at/ kssp.upd.edu.ph StoryURL: /mobile/.*\.htm ImageURL: /images/.*\.gif
yahoo_sport_news.site:
# Yahoo- Top stories page site grabber # Written by: Saunders, Richard <risaunde@usa.capgemini.com> URL: http://dailynews.yahoo.com/h/sp/nm/ Name: Yahoo! Sports News # Theres two levels: the news index followed by each story. Levels: 2 # There are lots of links off the page, but not all are stories. StoryURL: http://dailynews.yahoo.com/h/nm/\d+/sp/.*\.html # Yahoo provides great comments to stop sitescooper from breaking! StoryStart: !-- TextStart -- StoryEnd: !-- TextEnd --
anandtech.site:
URL: http://www.anandtech.com/webnews.html Name: AnandTech Levels: 1 StoryStart: <a name="TopNews"> StoryEnd: Copyright © \d+-\d+ AnandTech, Inc. All rights UseTableSmarts: 0
ars_technica.site:
URL: http://arstechnica.com/index.html Name: Ars Technica StoryStart: <STRONG><SMALL>From the News Desk</SMALL></STRONG> StoryEnd: </html> StoryDiff: 1
computer_world.site:
URL: http://www.computerworld.com/news/weekinprint Name: ComputerWorld Description: ComputerWorld AuthorName: Goh Boon Nam # ComputerWorld - US # Note - PDB output filesize typically around 80KB # Version 1.0 # Date updated : 22 Aug 2003 # Changes for 1.0 : 1st time creation Levels: 2 ContentsStart: Center cell primary content ContentsEnd: -- secondary -- StoryToPrintableSub: s!http://www.computerworld.com/.*/story/0,(.*?),!http://www.computerworld.com/printthis/2003/0,4814,! StoryURL: http://www.computerworld.com/printthis/.* StoryStart: -- Begin top component of story -- StoryEnd: -- End body content -- StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/ //gim; }
firstmonday.site:
URL: http://firstmonday.org/issues/current_issue/ Name: First Monday Description: a peer-reviewed journal on the internet Levels: 2 TableRender: flatten StoryURL: http://firstmonday.org/issues/issue\S+/\S+/(index.html|) StoryURL: http://firstmonday.org/issues/current_issue/\S+/(index.html|) ImageURL: .*/img/.*\.gif ImageScaleToMaxWidth: 150 AuthorName: Dwight D. McKay and Justin Mason
infoworld.site:
URL: http://www.infoworld.com/togo/main.html Name: InfoWorld to Go Levels: 3 ContentsPrint: 1 IssuePrint: 1 ImageURL: http://.* ContentsURL: http://www.infoworld.com/togo/.* StoryURL: http://www.infoworld.com/togo/.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
joelonsoftware.site:
URL: http://www.joelonsoftware.com/ Name: Joel on Software Levels: 2 # ContentsStart: NEW: # ContentsEnd: TAKE ACTION StoryURL: http://www.joelonsoftware.com/articles/.* ContentsDiff: 1 # by Akkana Peck <akkana /at/ shallowsky.com>
newsforge.site:
URL: http://www.newsforge.com Name: NewsForge Levels: 2 ContentsStart: <div class="article_box"> ContentsDiff: 1 StoryStart: <div class="article_box"> StoryURL: http://.*\.newsforge\.com/.* ContentsHTMLPreProcess: { s/<B>//gm; }
oreillynet_features.site:
URL: http://www.oreillynet.com/features/ Name: O'ReillyNet Features Description: Features from across the O'Reilly Network Levels: 2 ContentsStart: -- weekly package -- ContentsEnd: -- top five -- # ContentsDiff: 1 StoryURL: http://www.oreillynet.com/pub/a/\S+/\d+/([[MM]]|[[MM-1]])/\d+/\S+.html StoryURL: http://www.oreillynet.com/pub/a/\S+/\d+/([[MM]]|[[MM-1]])/\d+/\S+.html.page=\d+ StoryStart: -- content here -- StoryEnd: -- sponsor column -- StoryFollowLinks: 1
os_opinion.site:
URL: http://www.osopinion.com/ Name: OS Opinion Levels: 2 ContentsStart: <TABLE width="100%" border="0" cellspacing="5" ContentsEnd: <TD width="3%" valign="top" height="3587" bgcolor="#FFFFFF" align="center"> StoryURL: /Opinions/.*\.html StoryStart: .BeginEditable .Content%20image%20area. StoryEnd: .EndEditable StoryFollowLinks: 1 StoryHeadline: <.-- .BeginEditable .doctitle. -->(.*?)<.-- .EndEditable -->
pcmag_images.site:
URL: http://www.pcmag.com/current_issue/ Name: PCMagazine-BiWed Description: PCMagazine AuthorName: Goh Boon Nam # PC Magazine with Images # Note - PDB output filesize typically more than 500KB # Version 1.4 # Date updated : 8 Jun 2005 # Changes for 1.5 : Changes to cater to new ContentsEnd & StoryEnd # Also to cater to ill-formed img tags in PCMag html # and to take in slide show photos Levels: 2 ContentsStart: BEGIN MAIN TABLE ContentsEnd: <!-- include file="display_homepage_line_break.asp" --> StoryURL: http://www.pcmag.com/article2/.* StoryURL: http://www.pcmag.com/slideshow/.* ImageURL: http://common.ziffdavisinternet.com/util_get_image/\d+.* ImageURL: http://www.pcmag.com/images/(.*?)dot.gif StoryStart: BEGIN MAIN TABLE StoryEnd: (OpenSaveArticleWindow|<div class="slideshow_caption">) StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="oc_header">(.*?)<\/div>//gis; s/<div id="online_classified">(.*?)<\/div>//gis; s/<div class="article_price_container">(.*?)<\/div>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="Article_Header_Table">(.*?)<\/table>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="PG_Header_Table">(.*?)<\/table>//gis; s/<br\/>//gim; s/<div id="Premium_Partners_Container">(.*?)<\/div>//gis; s/>All Shots<\/a>/>next ><\/a>/gim; s/jpg"\/>/jpg">/gim; s/gif"\/>/gif">/gim; } # Notes # ----- # s/>All Shots<\/a>/>next ><\/a>/gim; -> trick sitescooper to follow link to All Shots page # s/jpg"\/>/jpg">/gim; -> clean up malformed img tag - eg. in All Shots page # s/gif"\/>/gif">/gim; -> clean up malformed img tag - eg. in All Shots page
risks.site:
URL: http://catless.ncl.ac.uk/go/risks/latest Name: comp.risks Levels: 2 StoryURL: http://catless.ncl.ac.uk/go/risks/\d+/\d+/\d+ MinPages: 2 StoryHeadline: <b>(.+?)</b>
slashdot_top.site:
# # Slashdot, top level only, no stories. # URL: http://slashdot.org/index.pl?simpledesign=1&lowbandwidth=1 Name: SlashDot Top Description: News for Nerds, Stuff that Matters Levels: 1 # In a single-level scoop, sitescooper uses StoryStart/StoryEnd # even for the first page. StoryStart: <div class="article"> StoryEnd: <div class="btmnav">
slyck.site:
URL: http://www.slyck.com Name: Slyck Levels: 2 ContentsStart: <img src=pics/slyck_news.gif> ContentsEnd: <img src=pics/new_rel.gif usemap=#nrmap border=0> StoryURL: http://.*
techdirt.site:
URL: http://www.techdirt.com/search.pl?query=&topic=§ion=&author= Name: TechDirt StoryURL: /(articles|fotr)/\d+/\S+html StoryStart: <TABLE width=99% cellpadding=0 cellspacing=0 border=0 StoryEnd: This site was built on ContentsStart: <SELECT name=author> ContentsEnd: This site was built on ContentsDiff: 1 Levels: 2 MinPages: 2
the_register.site:
# Modified to include less unnecessary text, bold titles, ... # by Peter Marschall, Version 1.1, 3.11.2000 URL: http://www.theregister.co.uk/ Name: The Register Levels: 2 # ContentsStart: <div id=\"Index\"> ContentsStart: <h2>Headlines</h2> ContentsDiff: 1 ContentsUseTableSmarts: 0 StoryURL: http://www.theregister.co.uk/.* # StoryHeadline: <DIV CLASS="storyhead">(.*?)</DIV> StoryStart: <div class=\"IconsTop\"> StoryCacheable: 1 # This probably isn't relevant any more: #StoryHTMLPreProcess: { # s/<DIV CLASS=.storyhead.>(.*?)<\/DIV>/<H2 CLASS='storyhead'>$1<\/H2>/is; # s/<br>.<br><B>Related (?:[sS]tory|[sS]tories|[lL]ink|[lL]inks)<\/B>.*\Z//s; # s/<br>+/<br>/i; # s/<br><p>(?:<br>)*/<p>/i; #} #MinPages: 2
wiredmag.site:
URL: http://www.wired.com/ # Also check out: http://www.wired.com/news_drop/palmpilot/ Name: Wired Description: Wired Magazine Levels: 2 ContentsStart: <div id="mainStories"> StoryURL: http://www.wired.com/.* StoryStart: <div class="storyTxt">
xmlhack.site:
URL: http://xmlhack.com/dlist.php?date=[[MM]]-[[YYYY]] Name: XMLHack Description: Developer news from the XML community Levels: 2 ContentsStart: <BODY ContentsEnd: <TD VALIGN="TOP" WIDTH=241 ALIGN="RIGHT"> ContentsDiff: 1 StoryURL: /read.php.* StoryStart: <BODY StoryEnd: <TD VALIGN="TOP" WIDTH=241 ALIGN="RIGHT"> MinPages: 2
zzz.site:
# Sitescooper site file for ZZZ Online # Written by Alastair Rankine <arankine@avaya.com> # URL: http://zzz.com.ru/archive.html Name: ZZZ Online Description: Technology news Levels: 2 ContentsDiff: 1 StoryURL: http://zzz.com.ru/\d+.html ImageURL: http://zzz.com.ru/pic\d+.jpg ImageURL: http://zzz.com.ru/\d+num\d+.jpg ImageScaleToMaxWidth: 156 ContentsStart: <!-- main column // --> ContentsEnd: <!-- end of main column // --> StoryStart: <!-- main column // --> StoryEnd: <!-- end of main column // -->
paulgraham.site:
URL: http://paulgraham.com/articles.html Name: Paul Graham Levels: 2 StoryURL: http://paulgraham.com/.*.html ContentsDiff: 1
pcmag_firstlooks.site:
URL: http://www.pcmag.com/category2/0,1738,21,00.asp Name: PCMag-1stLooks Description: PCMagazine First Looks AuthorName: Goh Boon Nam # PC Magazine's First Looks Section with Images # Version 1.0 # Date updated : 14 Jun 2005 Levels: 2 ContentsStart: BEGIN MAIN TABLE ContentsEnd: <!-- include file="display_homepage_line_break.asp" --> StoryURL: http://www.pcmag.com/article2/.* StoryURL: http://www.pcmag.com/slideshow/.* StoryStart: BEGIN MAIN TABLE StoryEnd: (OpenSaveArticleWindow|<div class="slideshow_caption">) ImageURL: http://common.ziffdavisinternet.com/util_get_image/\d+.* ImageURL: http://www.pcmag.com/images/(.*?)dot.gif ContentsHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="oc_header">(.*?)<\/div>//gis; s/<div id="online_classified">(.*?)<\/div>//gis; s/<div class="article_price_container">(.*?)<\/div>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="Article_Header_Table">(.*?)<\/table>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="PG_Header_Table">(.*?)<\/table>//gis; s/<br\/>//gim; s/<div id="Premium_Partners_Container">(.*?)<\/div>//gis; s/>All Shots<\/a>/>next ><\/a>/gim; s/jpg"\/>/jpg">/gim; s/gif"\/>/gif">/gim; } StoryHTMLPreProcess: { s/align="right"//gim; s/align="center"//gim; s/align=right//gim; s/align=center//gim; s/<div id="oc_header">(.*?)<\/div>//gis; s/<div id="online_classified">(.*?)<\/div>//gis; s/<div class="article_price_container">(.*?)<\/div>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="Article_Header_Table">(.*?)<\/table>//gis; s/<table width="100%" cellpadding="0" cellspacing="0" border="0" class="PG_Header_Table">(.*?)<\/table>//gis; s/<br\/>//gim; s/<div id="Premium_Partners_Container">(.*?)<\/div>//gis; s/>All Shots<\/a>/>next ><\/a>/gim; s/jpg"\/>/jpg">/gim; s/gif"\/>/gif">/gim; } # Notes # ----- # s/>All Shots<\/a>/>next ><\/a>/gim; -> trick sitescooper to follow link to All Shots page # s/jpg"\/>/jpg">/gim; -> clean up malformed img tag - eg. in All Shots page # s/gif"\/>/gif">/gim; -> clean up malformed img tag - eg. in All Shots page
tvguide.site:
URL: http://tvguide.com/palm/ Name: TVGEN Levels: 2 ContentsPrint: 1 StoryURL: http://.* ImageURL: http://.* # # This site was converted from an AvantGo .subs file by subs-to-site.pl. # See http://sitescooper.org/ for more information on sitescooper.
freshmeat_articles.site:
URL: http://freshmeat.net/articles/ Name: Freshmeat Articles Description: editorials and articles from Freshmeat, UNIX software listings site Levels: 2 AuthorName: jm ContentsStart: -- Content -- ContentsEnd: -- End of content -- StoryURL: http://freshmeat.net/articles/view/\d+/ StoryStart: -- Content -- StoryEnd: -- End of content --
rootprompt.site:
URL: http://www.rootprompt.org/rss/ Name: RootPrompt.org Description: Nothing but Unix Levels: 2 ContentsFormat: rss StoryStart: <table width=100% VALIGN=top cellpading=0 cellspacing=5 border=0><tr width=100% NOSAVE> StoryEnd: <a href=submit.php3>Suggest an article or news story</a> StoryURL: http://rootprompt.org/article.php3\?article=\d+
samba_traffic.site:
# thanks to Lim Swee Tat <st_lim@3ui.com> URL: http://kt.zork.net/samba/latest_print.html Name: Kernel Traffic - Samba Levels: 1 StoryStart: Table Of Contents StoryEnd: </html>
wine_traffic.site:
URL: http://kt.zork.net/wine/latest_print.html Name: KC - Wine Levels: 1 StoryStart: Table Of Contents StoryEnd: </HTML>
iwin.site:
URL: http://iwin2.nws.noaa.gov/iwin/textversion/ Name: IWIN Weather Levels: 3 IssueURL: http://iwin2.nws.noaa.gov/iwin/.* IssuePrint: 1 ContentsURL: http://iwin2.nws.noaa.gov/iwin/.* ContentsPrint: 1
nrcc_northeast_forecast.site:
URL: http://met-www.cit.cornell.edu/text/forecast.html Name: NRCC Forecasts for Northeastern US Levels: 3 ImageURL: http://.* ContentsURL: http://met-www.cit.cornell.edu/text/ ContentsPrint: 1 StoryURL: http://met-www.cit.cornell.edu/cgi-bin.*
wu_new_mexico.site:
# Current weather reports # To use (in the US), replace ``88005'' with your Zip code # Site file by Joe Pfeiffer, pfeiffer /at/ cs.nmsu.edu # Changed to use a layout by jm /at/ jmason.org # URL: http://www.wunderground.com/cgi-bin/findweather/getForecast?query=88005 Name: Weather - New Mexico
wu_redmond.site:
# Current weather reports # To use (in the US), adjust zip code. # Site file by Joe Pfeiffer, pfeiffer /at/ cs.nmsu.edu # Changed to use a layout by jm /at/ jmason.org # # Robb Canfield <robb@canfield.com> # * Switched to print... URL, it parses better # * Use my new Table reformater URL: http://printer.wunderground.com/cgi-bin/findweather/getForecast?query=98059 StoryUseTableSmarts: 0 Name: Weather - Redmond TableRender: list
alertbox.site:
URL: http://www.useit.com/alertbox/ Name: Alertbox Levels: 2 ContentsStart: Current Column ContentsEnd: Previous Columns StoryURL: http://www.useit.com/alertbox/\d+\.html
jon_udell.site:
URL: http://udell.roninhouse.com/ Name: Jon Udells Articles Description: Articles by Jon Udell, Byte.com columnist Levels: 2 ContentsStart: <b>Tuning in to Jon's channel</b> ContentsEnd: This channel rendering courtesy of StoryURL: http://www.byte.com/column/BYT\S+ StoryURL: http://www.byte.com/printableArticle\?doc_id=BYT\S+ StoryToPrintableSub: s,/column/,/printableArticle?doc_id=, StoryStart: <font size="5"> StoryEnd: </HTML>
mappa_mundi.site:
URL: http://mappa.mundi.net/map/ss_index_issue.html Name: Mappa.Mundi Description: Revealing Invisible Worlds Levels: 2 # jm: man, this site has an excellent site map! Cheers guys ContentsStart: -- CURRENT ISSUE -- ContentsEnd: ====== FOOTER FOLLOWS ====== StoryURL: /(inform|locus|visions|reviews|maps|about)/.* StoryStart: ====== LEFT HAND NAVIGATION FOLLOWS ==== StoryEnd: <A HREF="/contact/">contact</A> TableRender: flatten
mozillazine.site:
URL: http://www.mozillazine.org/contents.rdf Name: MozillaZine Description: Your source for Mozilla news, advocacy, interviews, builds, and more! ContentsFormat: rss StoryURL: /talkback\.html\?article=\d+ # You may also want to add a StoryStart and StoryEnd line to # clean up the stories. Here's sample lines (you need to edit them): # StoryStart: --features-- StoryEnd: form method="post" action # (This is a sitescooper site file. see http://sitescooper.tsx.org/ # It was generated from the site's RSS by rss-to-site.pl 1.0.)
researchbuzz.site:
URL: http://www.researchbuzz.com/news/ Name: ResearchBuzz Description: covering the world of Internet research StoryDiff: 1 StoryStart: News: This Week StoryEnd: <SPAN class=body4>All original material on
searchenginereport.site:
URL: http://searchenginewatch.com/sereport/current.html Name: Search Engine Report Description: Danny Sullivan's monthly newsletter covering search engine developments AuthorName: jm Levels: 2 StoryURL: http://searchenginewatch.com/sereport/\d+/\d+.*\.html ContentsEnd: <form name="myForm"> StoryEnd: <form name="myForm">
bifurcated_rivets.site:
# Bifurcated Rivets by Lindsay Marshall # URL: http://catless.ncl.ac.uk/Lindsay/weblog/latest.html Name: Bifurcated Rivets StoryStart: <!--@@H1--> StoryEnd: <!--@@F1--> StoryDiff: 1
boingboing.site:
URL: http://www.boingboing.net/ Name: Boing Boing Description: Mark Frauenfelder's directory of wonderful things StoryEnd: -- BEGIN SPYONIT.COM SPYMAKER -- StoryDiff: 1
camworld.site:
URL: http://www.camworld.com/ Name: CamWorld Description: Random Thoughts, New Media, Web Design Levels: 1 StoryStart: Search CamWorld for: StoryEnd: Go to the entries for
crummy.site:
URL: http://www.crummy.com/ Name: Crummy Description: weblog by Leonard Richardson Levels: 1 StoryStart: <td halign=left width=50%> StoryEnd: <h4>Catch up on previously Bruised News StoryDiff: 1 # I really need to get into the habit of doing this. AuthorName: jm@jmason.org AuthorEMail: jm@jmason.org
doc_searls.site:
URL: http://doc.weblogs.com/ Name: Doc Searls Weblog Description: Doc Searls' weblog Levels: 1 StoryStart: -- End Image Map -- StoryEnd: <table class="hCalendarTable" cellspacing="0" border="0"> StoryDiff: 1 TableRender: flatten
eckes.site:
# From: Mela Eckenfels <mela AT darkover.inka.de> URL: http://www.eckes.org/modules.php?name=AvantGo Name: Eckes.org - Opinions of some Geeks Description: Thoughts about Tech, GeekStuff and the unsettling RealLife. Levels: 2 StoryURL: /.*sid\=.* StoryDiff: 1 ContentsPrint: 1 ContentsDiff: 1
ethel_the_blog.site:
URL: http://stommel.tamu.edu/~baum/ethel/blogger.html Name: Ethel The Blog Description: Observations on science, computers, books, music and other shiny things that catch my mind's eye. Levels: 1 StoryStart: create a gutter between the left margin and page content StoryEnd: <FONT SIZE=4>\s+LEISURE\s+</FONT> StoryDiff: 1 # argh, big fonts! Nein danke. StoryPostProcess: { s/<FONT size=\"?4\"?>/<font size=3>/gs; }
flutterby.site:
URL: http://www.flutterby.com/ Name: Flutterby Levels: 1 StoryEnd: Connectivity provided by StoryDiff: 1
genehack.site:
URL: http://www.genehack.org/ Name: GeneHack Levels: 1 StoryStart: /universal header StoryEnd: Unless otherwise noted, all rights reserved. StoryDiff: 1
hack_the_planet.site:
URL: http://wmf.editthispage.com/ Name: Hack The Planet Levels: 1 StoryStart: <a href="http://crit.org/http://crit.org/pub/cs.utexas.edu/wesf/">CritLink</a><br> StoryEnd: <td width="20"> </td> StoryDiff: 1
honeyguide.site:
URL: http://www.chaparraltree.com/honeyguide/ Name: Honeyguide Description: good science-oriented weblog Levels: 1 StoryStart: <p class=archmenu> StoryEnd: Copyright \d+-\d+ StoryDiff: 1
jason_pettus.site:
# site_samples/weblog/jason_pettus.site # # Jason Pettus, Chicago USA URL: http://www.geocities.com/jpettus.geo/ Name: Jason Pettus Levels: 1 AuthorName: Jan Lund Thomsen AuthorEmail: kwed@kwed.org StoryStart: <!-- Content start --> StoryEnd: <!-- Content end -->
memepool.site:
URL: http://www.memepool.com/ Name: Memepool Levels: 1 StoryDiff: 1 UseTableSmarts: 0
monkeyfist.site:
URL: http://www.monkeyfist.com/plain/ Name: Monkeyfist Description: The Monkeyfist Collective AuthorName: Justin Mason AuthorEMail: jm /at/ jmason.org Levels: 1 StoryDiff: 1
mydog.site:
# contributed by michael d. ivey <ivey /at/ gweezlebur.com> # URL: http://gweezlebur.com/~ivey/weblog/ Name: my dog wants to be on the radio Description: michael d. ivey's weblog Levels: 1 StoryStart: Begin Daily Section StoryEnd: <a href="/~ivey/index.shtml">home</a>
ntk.site:
URL: http://www.ntk.net/ Name: NTKnow StoryStart: <table StoryEnd: </table>
peterme.site:
# peterme.com -- "Providing meme therapy for your troubled thoughts" # URL: http://peterme.com/ Name: PeterMe StoryDiff: 1
rathergood.site:
URL: http://www.rathergood.com/ Name: rathergood.com Description: The Crab of Eternal Wisdom ponders the nature of existence AuthorName: Justin Mason AuthorEMail: jm /at/ jmason.org Levels: 1 StoryDiff: 1
rc3.site:
URL: http://www.rc3.org/ Name: RC3 Levels: 1 StoryStart: <body StoryEnd: This site designed by Rafe Colburn. Copyright StoryDiff: 1
riverbend.site:
# Baghdad Burning: Riverbend's Blog URL: http://riverbendblog.blogspot.com/ Name: Riverbend Description: Riverbend: Girl Blog from Iraq Levels: 1 StoryStart: id="Title">Baghdad Burning StoryDiff: 1
robot_wisdom.site:
URL: http://www.robotwisdom.com/ Name: Robot Wisdom Levels: 1 StoryStart: <b>Headlines:</b> StoryEnd: <b>WebLog Archives</b> StoryDiff: 1
scripting_news.site:
# Dave Winer's Scripting News URL: http://www.scripting.com/ Name: Scripting News Levels: 1 # this is inconvenient; there's no easy way to find start of text anymore. StoryStart: <table width="400" cellspacing="0" cellpadding="3" border="0"> StoryEnd: <b>Last update</b>: StoryDiff: 1
tim_oreilly.site:
URL: http://www.oreillynet.com/weblogs/tim/ Name: Tim O'Reilly's Weblog Levels: 1 StoryStart: -- *content here *-- StoryEnd: -- *sponsor column *-- StoryDiff: 1
tomalaks_realm.site:
# Tomalak's Realm URL: http://tr.pair.com/avantgo.html Name: Tomalaks Realm Levels: 1 StoryStart: <html> StoryEnd: </html> StoryDiff: 1
where_is_raed.site:
# Where is Raed? URL: http://www.dearraed.blogspot.com/ Name: WhereIsRaed Description: Where Is Raed Levels: 1 StoryStart: blogger code # ContentsDiff doesn't seem to work here, for some reason. ContentsDiff: 1 # Salam puts everything inside blockquote, which makes it come out # in a tiny central column on the palm. ContentsHTMLPreProcess: { s/<blockquote>//gim; s/<\/blockquote>//gim; }
kevin_sites.site:
# #This file captures images and text from the site. The pictures flesh out a lot of the #stories but the writing is good enough that they aren't really needed. I sync to a card #so file size isn't important. Comment out the last two lines for smaller files. # URL: http://www.kevinsites.net/ Name: Iraq War Blog - K. Sites Description: Kevin Sites' War Blog - Iraq AuthorName: Delmer Wells <delmer at delmer dot com> #Updated 11-4-04 Levels: 1 StoryDiff: 1 ImageURL: http://www.kevinsites.net/images/.* ImageScaleToMaxWidth: 500
(Scooped by sitescooper. Go back to the sitescooper page)