# # /robots.txt file for http://www.sentry.org/ # # Robots to restrict (* is all) User-agent: * Disallow: /cgi-bin/ Disallow: /docs/ Disallow: /~trev/opus/173files/ Disallow: /~trev/opus/179files/ Disallow: /~trev/perl/ Disallow: /cgi-bin/ Disallow: /~adrienne/photoalbums/ # Turnitin plagiarism robot User-agent: TurnitinBot Disallow: /~trev/opus/ # Unknown User-agent: NPBot Disallow: / # MSIE browser User-agent: MSIECrawler Disallow: / # MSN robot User-agent: msnbot Disallow: / # Microsoft User-agent: msnbot-media Disallow: / # Microsoft User-agent: msnbot-products Disallow: / # Microsoft User-agent: msnbot-newsblogs Disallow: / # Picture Indexer http://www.picsearch.com/bot.html User-agent: psbot Disallow: / # Japanese robot User-agent: Baiduspider+ Disallow: / # Telstra User-agent: Sensis Web Crawler Disallow: / # Unknown robot User-agent: appie Disallow: / # Italian robot User-agent: Iltrovatore-Setaccio Disallow: / # Contextual robot - kuloko.com "coming soon" User-agent: kuloko-bot Disallow: / # Google adwords bot User-agent: Mediapartners-Google Disallow: / # Google image bot User-agent: Googlebot-Image Disallow: / # Google mobile bot User-agent: Googlebot-Mobile Disallow: / # Japanese image robot User-agent: ImageBot Disallow: / # Japanese search engine User-agent: NaverBot-1.0 Disallow: / # Japanese search engine User-agent: NaverBot_dloader Disallow: / # Polish robot User-agent: Szukacz Disallow: / # http://www.gigablast.com/spider.html User-agent: Gigabot Disallow: / # Russian? open source search engine User-agent: DataparkSearch Disallow: / # Unknown User-agent: QuepasaCreep Disallow: / # Unknown User-agent: SurveyBot Disallow: / # Unknown User-agent: NetResearchServer Disallow: / # Unknown User-agent: Clustered-Search-Bot Disallow: / # Unknown User-agent: Scrubby Disallow: / # Unknown User-agent: VoilaBot Disallow: / # Unknown User-agent: GeonaBot Disallow: / # Unknown User-agent: Pompos Disallow: / # Chinese edu search engine # http://www.openfind.com.tw/robot.html User-agent: Openbot Disallow: / # Cluster search engine User-agent: Clushbot Disallow: / # Yahoo ignoring /docs/ exclusion # so disallow all User-agent: Slurp Disallow: / # ?? User-agent: Jetbot Disallow: / # ?? User-agent: sohu-search Disallow: / # http://www.amfibi.com User-agent: Amfibibot Disallow: / # www.updated.com User-agent: updated Disallow: / # Yahoo image crawler User-agent: Yahoo-MMCrawler Disallow: / # ??? User-agent: Knowledge.com Disallow: / # Russian robot: tankvit@e-mail.ru User-agent: booch_1.0.7 Disallow: / # http://www.WISEnutbot.com User-agent: ZyBorg Disallow: / # www.seventwentyfour.com User-agent: LinkWalker Disallow: / # http://www.become.com/webmasters.html # # http://www.become.com/site_owners.html User-agent: BecomeBot Disallow: / # http://www.tutorgig.info User-agent: TutorGigBot Disallow: / # ??? User-agent: Girafabot Disallow: / # ??? User-agent: Holmes Disallow: / # http://holmes.ge User-agent: HolmesBot Disallow: / # ??? User-agent: webcrawl.net Disallow: / # ??? User-agent: GoForIt.com Disallow: / # http://www.globalspec.com/Ocelli User-agent: Ocelli Disallow: / # ??? User-agent: Ultraseek Disallow: / # http://www.nutch.org/docs/en/bot.html User-agent: NutchCVS Disallow: / # http://www.eliyon.com/NextGenSearchBot User-agent: NextGenSearchBot Disallow: / # http://www.authoritativeweb.com/crawl User-agent: ConveraCrawler Disallow: / # ??? User-agent: ScSpider Disallow: / # www.earthcom.info User-agent: EARTHCOM.info Disallow: / # ??? User-agent: Zao-Crawler Disallow: / # http://www.omni-explorer.com Internet Categorizer User-agent: OmniExplorer_Bot Disallow: / # http://www.aipbot.com User-agent: aipbot Disallow: / # http://www.entireweb.com User-agent: Speedy Spider Disallow: / # http://www.aberja.de User-agent: Aberja Checkomat Disallow: / # http://irl.cs.tamu.edu/crawler User-agent: IRLbot Disallow: / # http://wortschatz.uni-leipzig.de/nextlinks/findlinks.html User-agent: findlinks Disallow: / # http://gossamer-threads.com/scripts/links/ User-agent: Links Disallow: / # ?? User-agent: MnoGoSearch Disallow: / # Ask Jeeves: http://sp.ask.com/docs/about/tech_crawling.html User-agent: Teoma Disallow: / # ?? User-agent: wbdbot Disallow: / # Czech search engine User-agent: ccubee Disallow: / # IBM User-agent: http://www.almaden.ibm.com/cs/crawler Disallow: / # http://squigglebot.com User-agent: SquigglebotBot Disallow: / # http://64.124.122.252/feedback.html User-agent: RufusBot Disallow: / # Scam User-agent: Scumbot Disallow: / # http://www.kosmix.com/html/crawler.html User-agent: voyager Disallow: / # www.ansearch.com.au User-agent: AnsearchBot Disallow: / # www.local.com User-agent: LocalcomBot Disallow: / # http://www.cazoodle.com User-agent: CazoodleBot Disallow: / # http://www.sitesell.com/sbider.html User-agent: SBIder Disallow: / # http://www.exabot.com/go/robot User-agent: Exabot Disallow: / # Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com) User-agent: Accoona-AI-Agent Disallow: / # http://kc.nict.go.jp/icc/crawl.html User-agent: LC-Crawler Disallow: / # http://kc.nict.go.jp/icc/crawl.html User-agent: ICC-Crawler Disallow: / # http://www.sogou.com/docs/help/webmasters.htm#07 User-agent: sogou web spider Disallow: / # http://www.boitho.com/dcbot.html User-agent: boitho.com-dc Disallow: / # http://www.envolk.com/envolkspiderinfo.html User-agent: envolk Disallow: / # http://www.webalta.net/ru/about_webmaster.html User-agent: WebAlta Crawler Disallow: / # RedBot/redbot-1.0 (Rediff.com Crawler; redbot at rediff dot com) User-agent: RedBot Disallow: / # Unknown User-agent: Yandex Disallow: / # http://www.yodao.com/help/webmaster/spider/ User-agent: YodaoBot-Image Disallow: / # http://www.yodao.com/help/webmaster/spider/ User-agent: YodaoBot Disallow: / # http://www.cuill.com/twiceler/robot.html User-agent: Twiceler Disallow: / # http://szukaj.onet.pl User-agent: holmes Disallow: / # http://szukaj.onet.pl User-agent: OnetSzukaj Disallow: / # ???? User-agent: StackRambler Disallow: / # http://search.msn.com/msnbot.htm User-agent: MSNBOT_Mobile Disallow: / # http://www.setooz.com/oozbot.html User-agent: OOZBOT Disallow: / # ???? User-agent: RAYSPIDER Disallow: / # http://www.exooba.com/; info at exooba dot com User-agent: exooba Disallow: / # Metaspinner; http://www.meta-spinner.de/ User-agent: Metaspinner Disallow: / # Beta, mailto:gue@cis.uni-muenchen.de User-agent: wwwster Disallow: / # http://www.majestic12.co.uk/bot.php?+ User-agent: MJ12bot Disallow: / # ???? User-agent: Jakarta Disallow: / # http://nlp.uned.es/qeavis/ User-agent: QEAVis Disallow: / # http://www.scoutjet.com/ User-agent: ScoutJet Disallow: / # bot@bot.bot User-agent: bot Disallow: / # ??? User-agent: obot Disallow: / # www.openacoon.de User-agent: OpenAcoon Disallow: / # ??? User-agent: Mail.Ru Disallow: / # http://www.commoncrawl.org/bot.html User-agent: CCBot Disallow: / # http://www.shopsalad.com/ User-agent: SaladSpoon Disallow: / # # /robots.txt file for http://shadow.sentry.org/ # # Robots to restrict (* is all) User-agent: * Disallow: /cgi-bin/ # Unknown User-agent: NPBot Disallow: / # MSIE browser User-agent: MSIECrawler Disallow: / # MSN robot User-agent: msnbot Disallow: / # Microsoft User-agent: msnbot-media Disallow: / # Picture Indexer http://www.picsearch.com/bot.html User-agent: psbot Disallow: / # Japanese robot User-agent: Baiduspider+ Disallow: / # Telstra User-agent: Sensis Web Crawler Disallow: / # Unknown robot User-agent: appie Disallow: / # Italian robot User-agent: Iltrovatore-Setaccio Disallow: / # Contextual robot - kuloko.com "coming soon" User-agent: kuloko-bot Disallow: / # Google adwords bot User-agent: Mediapartners-Google Disallow: / # Google image bot User-agent: Googlebot-Image Disallow: / # Japanese image robot User-agent: ImageBot Disallow: / # Japanese search engine User-agent: NaverBot-1.0 Disallow: / # Japanese search engine User-agent: NaverBot_dloader Disallow: / # Polish robot User-agent: Szukacz Disallow: / # Unknown User-agent: Gigabot Disallow: / # Russian? open source search engine User-agent: DataparkSearch Disallow: / # Unknown User-agent: QuepasaCreep Disallow: / # Unknown User-agent: SurveyBot Disallow: / # Unknown User-agent: NetResearchServer Disallow: / # Unknown User-agent: Clustered-Search-Bot Disallow: / # Unknown User-agent: Scrubby Disallow: / # Unknown User-agent: VoilaBot Disallow: / # Unknown User-agent: GeonaBot Disallow: / # Unknown User-agent: Pompos Disallow: / # Chinese edu search engine # http://www.openfind.com.tw/robot.html User-agent: Openbot Disallow: / # Cluster search engine User-agent: Clushbot Disallow: / # Yahoo ignoring /docs/ exclusion # so disallow all User-agent: Slurp Disallow: / # ?? User-agent: Jetbot Disallow: / # ?? User-agent: sohu-search Disallow: / # http://www.amfibi.com User-agent: Amfibibot Disallow: / # www.updated.com User-agent: updated Disallow: / # ??? User-agent: StackRambler Disallow: / # Yahoo image crawler User-agent: Yahoo-MMCrawler Disallow: / # ??? User-agent: Knowledge.com Disallow: / # Russian robot: tankvit@e-mail.ru User-agent: booch_1.0.7 Disallow: / # http://www.WISEnutbot.com User-agent: ZyBorg Disallow: / # www.seventwentyfour.com User-agent: LinkWalker Disallow: / # http://www.become.com/webmasters.html # # http://www.become.com/site_owners.html User-agent: BecomeBot Disallow: / # http://www.tutorgig.info User-agent: TutorGigBot Disallow: / # ??? User-agent: Girafabot Disallow: / # ??? User-agent: Holmes Disallow: / # ??? User-agent: webcrawl.net Disallow: / # ??? User-agent: GoForIt.com Disallow: / # http://www.globalspec.com/Ocelli User-agent: Ocelli Disallow: / # ??? User-agent: Ultraseek Disallow: / # http://www.nutch.org/docs/en/bot.html User-agent: NutchCVS Disallow: / # http://www.eliyon.com/NextGenSearchBot User-agent: NextGenSearchBot Disallow: / # http://www.authoritativeweb.com/crawl User-agent: ConveraCrawler Disallow: / # ??? User-agent: ScSpider Disallow: / # www.earthcom.info User-agent: EARTHCOM.info Disallow: / # http://www.majestic12.co.uk/projects/dsearch/mj12bot.php User-agent: MJ12bot Disallow: / # ??? User-agent: Zao-Crawler Disallow: / # http://www.omni-explorer.com Internet Categorizer User-agent: OmniExplorer_Bot Disallow: / # http://www.aipbot.com User-agent: aipbot Disallow: / # http://www.entireweb.com User-agent: Speedy Spider Disallow: / # http://www.aberja.de User-agent: Aberja Checkomat Disallow: / # http://irl.cs.tamu.edu/crawler User-agent: IRLbot Disallow: / # http://wortschatz.uni-leipzig.de/nextlinks/findlinks.html User-agent: findlinks Disallow: / # http://gossamer-threads.com/scripts/links/ User-agent: Links 2.0 Disallow: / # ?? User-agent: MnoGoSearch Disallow: / # Ask Jeeves: http://sp.ask.com/docs/about/tech_crawling.html User-agent: Teoma Disallow: / # ?? User-agent: boitho.com-dc Disallow: / # ?? User-agent: wbdbot Disallow: / # Czech search engine User-agent: ccubee Disallow: / # IBM User-agent: http://www.almaden.ibm.com/cs/crawler Disallow: / # http://szukaj.onet.pl User-agent: OnetSzukaj Disallow: / # http://squigglebot.com User-agent: SquigglebotBot Disallow: / # http://64.124.122.252/feedback.html User-agent: RufusBot Disallow: / # Scam User-agent: Scumbot Disallow: / # http://www.boitho.com/dcbot.html User-agent: boitho.com-dc Disallow: / # ???? User-agent: voyager Disallow: / # www.ansearch.com.au User-agent: AnsearchBot Disallow: / # www.local.com User-agent: LocalcomBot Disallow: / # http://www.cazoodle.com User-agent: CazoodleBot Disallow: / # http://www.sitesell.com/sbider.html User-agent: SBIder Disallow: / # http://www.exabot.com/go/robot User-agent: Exabot Disallow: / # Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com) User-agent: Accoona-AI-Agent Disallow: / # http://kc.nict.go.jp/icc/crawl.html User-agent: LC-Crawler Disallow: / # http://kc.nict.go.jp/icc/crawl.html User-agent: ICC-Crawler Disallow: / # http://www.boitho.com/dcbot.html User-agent: SearchDaimon.com-dc Disallow: / # http://www.sogou.com/docs/help/webmasters.htm#07 User-agent: sogou web spider Disallow: / # http://www.boitho.com/dcbot.html User-agent: boitho.com-dc Disallow: / # http://www.envolk.com/envolkspiderinfo.html User-agent: envolk Disallow: / # http://www.webalta.net/ru/about_webmaster.html User-agent: WebAlta Crawler Disallow: / # RedBot/redbot-1.0 (Rediff.com Crawler; redbot at rediff dot com) User-agent: RedBot Disallow: / # Unknown User-agent: Yandex Disallow: / # http://www.yodao.com/help/webmaster/spider/ User-agent: YodaoBot-Image Disallow: / # http://www.cuill.com/twiceler/robot.html User-agent: Twiceler Disallow: / # http://szukaj.onet.pl User-agent: holmes Disallow: / # http://szukaj.onet.pl User-agent: OnetSzukaj Disallow: / # ???? User-agent: StackRambler Disallow: / # http://search.msn.com/msnbot.htm User-agent: MSNBOT_Mobile Disallow: / # http://www.setooz.com/oozbot.html User-agent: OOZBOT Disallow: / # ???? User-agent: RAYSPIDER Disallow: / # http://www.exooba.com/; info at exooba dot com User-agent: exooba Disallow: / # Metaspinner; http://www.meta-spinner.de/ User-agent: Metaspinner Disallow: / # Beta, mailto:gue@cis.uni-muenchen.de User-agent: wwwster Disallow: / # http://www.majestic12.co.uk/bot.php?+ User-agent: MJ12bot Disallow: / # ???? User-agent: Jakarta Disallow: / # http://nlp.uned.es/qeavis/ User-agent: QEAVis Disallow: / # http://www.scoutjet.com/ User-agent: ScoutJet Disallow: / # bot@bot.bot User-agent: bot Disallow: / # ??? User-agent: obot Disallow: / # www.openacoon.de User-agent: OpenAcoon Disallow: / # ??? User-agent: Mail.Ru Disallow: / # http://www.commoncrawl.org/bot.html User-agent: CCBot Disallow: / # http://www.shopsalad.com/ User-agent: SaladSpoon Disallow: / # User-agent: Apexoo Disallow: / # User-agent: Balihoo Disallow: / # User-agent: Bigsearch.ca Disallow: / # User-agent: BobCrawl Disallow: / # User-agent: CCBot Disallow: / # User-agent: CamontSpider Disallow: / # User-agent: CatchBot Disallow: / # User-agent: CazoodleBot Disallow: / # User-agent: Comodo-Certificates-Spider Disallow: / # User-agent: ConveraCrawler Disallow: / # User-agent: DiBot Disallow: / # User-agent: DoCoMo Disallow: / # User-agent: DotBot Disallow: / # User-agent: Exabot Disallow: / # User-agent: FAST Disallow: / # User-agent: Gaisbot Disallow: / # User-agent: Gigabot Disallow: / # User-agent: GingerCrawler Disallow: / # User-agent: Googlebot-Image Disallow: / # User-agent: Grub Disallow: / # User-agent: GurujiBot Disallow: / # User-agent: GurujiImageBot Disallow: / # User-agent: ICC-Crawler Disallow: / # User-agent: IRLbot Disallow: / # User-agent: IlseBot Disallow: / # User-agent: Inar_spider Disallow: / # User-agent: Intelix Disallow: / # User-agent: Isara Disallow: / # User-agent: Isidorus Disallow: / # User-agent: Jakarta Disallow: / # User-agent: Java Disallow: / # User-agent: JobSpider_BA Disallow: / # User-agent: KnowItAll Disallow: / # User-agent: LWP::Simple Disallow: / # User-agent: LarbinWebCrawler Disallow: / # User-agent: LijitSpider Disallow: / # User-agent: Linguee Disallow: / # User-agent: MJ12bot Disallow: / # User-agent: MLBot Disallow: / # User-agent: MSIE Disallow: / # User-agent: MSR-ISRCCrawler Disallow: / # User-agent: MSRBOT Disallow: / # User-agent: Mail.Ru Disallow: / # User-agent: Mosilla+ Disallow: / # User-agent: Mozilla+4.0 Disallow: / # User-agent: Mozilla Disallow: / # User-agent: NaverBot-1.0 Disallow: / # User-agent: NextGenSearchBot Disallow: / # User-agent: Nokia6680 Disallow: / # User-agent: Nokia6682 Disallow: / # User-agent: Nusearch Disallow: / # User-agent: Nutch Disallow: / # User-agent: NutchCVS Disallow: / # User-agent: Ocelli Disallow: / # User-agent: OutfoxBot Disallow: / # User-agent: PRCrawler Disallow: / # User-agent: Pete-Spider Disallow: / # User-agent: Python-urllib Disallow: / # User-agent: QEAVis Disallow: / # User-agent: REAP-crawler Disallow: / # User-agent: RedBot Disallow: / # User-agent: RufusBot Disallow: / # User-agent: SAMSUNG-SGH-E250 Disallow: / # User-agent: SBIder Disallow: / # User-agent: SaladSpoon Disallow: / # User-agent: SapphireWebCrawler Disallow: / # User-agent: ScSpider Disallow: / # User-agent: Semager Disallow: / # User-agent: SeznamBot Disallow: / # User-agent: SheenBot Disallow: / # User-agent: Shelob Disallow: / # User-agent: Shim-Crawler Disallow: / # User-agent: SimilarPages Disallow: / # User-agent: SiteSucker Disallow: / # User-agent: Snapbot Disallow: / # User-agent: Sogou Disallow: / # http://help.soso.com/webspider.htm User-agent: Sosospider+ Disallow: / # User-agent: Speedy Disallow: / # User-agent: StackRambler Disallow: / # User-agent: Steeler Disallow: / # User-agent: SurveyBot Disallow: / # User-agent: Teemer Disallow: / # User-agent: TurnitinBot Disallow: / # User-agent: Twiceler Disallow: / # User-agent: VisBot Disallow: / # User-agent: Voracious Disallow: / # User-agent: WebAlta Disallow: / # User-agent: WebarooBot Disallow: / # User-agent: Wget Disallow: / # User-agent: Y!J-SRD Disallow: / # User-agent: Yahoo-MMAudVid Disallow: / # User-agent: Yandex Disallow: / # User-agent: Yanga Disallow: / # User-agent: Yeti Disallow: / # User-agent: YodaoBot Disallow: / # User-agent: YowedoBot Disallow: / # User-agent: al_viewer Disallow: / # User-agent: all_web2 Disallow: / # User-agent: appie Disallow: / # User-agent: ayioncrawl Disallow: / # User-agent: bitlybot Disallow: / # User-agent: blackcrawl Disallow: / # User-agent: boitho.com-dc Disallow: / # User-agent: bot Disallow: / # User-agent: complex_network_group Disallow: / # User-agent: curl Disallow: / # User-agent: del.icio.us-thumbnails Disallow: / # User-agent: disco Disallow: / # User-agent: es_com_viewer Disallow: / # User-agent: es_org_web Disallow: / # User-agent: exooba Disallow: / # User-agent: findlinks Disallow: / # User-agent: gemiusbot Disallow: / # User-agent: holmes Disallow: / # User-agent: i1searchbot Disallow: / # User-agent: ichiro Disallow: / # User-agent: ilial Disallow: / # User-agent: kalooga Disallow: / # User-agent: larbin_2.6.3 Disallow: / # User-agent: li_viewer Disallow: / # User-agent: librabot Disallow: / # User-agent: localSystem Disallow: / # User-agent: lwp-trivial Disallow: / # User-agent: msnbot-Products Disallow: / # User-agent: msnbot-media Disallow: / # User-agent: msnbot Disallow: / # User-agent: multicrawler Disallow: / # User-agent: my-robot Disallow: / # User-agent: nokia6610I Disallow: / # User-agent: noxtrumbot Disallow: / # User-agent: nutch-solr Disallow: / # User-agent: nutch.biz Disallow: / # User-agent: nutch.us Disallow: / # User-agent: nutch Disallow: / # User-agent: nutchwax.com Disallow: / # User-agent: oBot Disallow: / # User-agent: panscient.com Disallow: / # User-agent: psbot Disallow: / # User-agent: pulseBot Disallow: / # User-agent: robotgenius Disallow: / # User-agent: ru_org_viewer Disallow: / # User-agent: sdcresearchlabs-testbot Disallow: / # User-agent: silk Disallow: / # User-agent: sproose Disallow: / # User-agent: taptubot Disallow: / # User-agent: thumbshots-de-bot Disallow: / # User-agent: toofaan Disallow: / # User-agent: voyager Disallow: / # User-agent: woriobot Disallow: / # User-agent: wwwster Disallow: / # User-agent: yacy Disallow: / # User-agent: yggdrasil Disallow: / # User-agent: yodaoice Disallow: / # User-agent: yoofind Disallow: /