# Note: # There are really misbehaved spiders out there. If you're # irresponsible, your access may be blocked using other means # than specified in the robots exclusion standard. # UHH/RRZ/HRF # Crawler for hire User-agent: 008 User-agent: 360Spider Disallow: / # A # User-agent: abot User-agent: AdsBot-Google User-agent: AdsTxtCrawler User-agent: adstxtlab.com Crawler User-agent: AhrefsBot User-agent: aibot User-agent: aiHitBot User-agent: aipbot User-agent: anbo-ubot Disallow: / # B # User-agent: b2w/0.1 User-agent: Barkrowler User-agent: BLEXBot User-agent: BUbiNG Disallow: / # C # User-agent: CCBot User-agent: ccubee User-agent: CherryPicker User-agent: Cliqzbot ## ?? User-agent: com.plumanalytics User-agent: Companybook-Crawler User-agent: Contacts-Crawler User-agent: ConveraCrawler User-agent: ConveraMultiMediaCrawler User-agent: CopyRightCheck User-agent: Crescent Disallow: / # D # User-agent: Dataprovider User-agent: DOC User-agent: DomainCrawler # DotBot allowed User-agent: Download Ninja Disallow: / # E # User-agent: EmailCollector User-agent: EmailSiphon User-agent: EmailWolf User-agent: Ernst User-agent: ExtractorPro Disallow: / # F # User-agent: Fetch Disallow: / # G # User-agent: GoKi User-agent: grub # has been *very* poorly behaved. User-agent: grub-client Disallow: / # H # User-agent: http://www.almaden.ibm.com/cs/crawler User-agent: HTTrack Disallow: / # I # User-agent: iCCrawler User-agent: ImplisenseBot Disallow: / # J # User-agent: Jersey User-agent: JobboerseBot User-agent: Jakarta Commons-HttpClient Disallow: / # K # # Doesn't follow robots.txt, but anyway... User-agent: k2spider Disallow: / # L # User-agent: larbin User-agent: libwww User-agent: libwww-perl User-agent: linkdexbot User-agent: linko User-agent: linkwalker User-agent: looksmart User-agent: ltx71 Disallow: / # M # User-agent: magpie-crawler User-agent: Mail.Ru Disallow: / User-agent: MauiBot Disallow: / User-agent: Mediapartners-Google* User-agent: Microsoft.URL.Control User-agent: MJ12bot User-agent: MS FrontPage User-agent: MSIECrawler Disallow: / # N # User-agent: NetMechanic # Crawl rate too high. http://www.nameprotect.com/botinfo.html User-agent: NPBot Disallow: / # O # User-agent: obot User-agent: oBot User-agent: Offline Explorer User-agent: OmniExplorer_Bot User-agent: OnPageBot Disallow: / # P # User-agent: Pcore-HTTP User-agent: peopleman User-agent: PhpDig User-agent: PiplBot User-agent: ProWebWalker User-agent: psbot User-agent: Python-urllib Disallow: / # Q # User-agent: QualidatorWebSiteAnalyzer User-agent: QualidatorBot Disallow: / # R # User-agent: rapidminer-web-mining-extension-crawler User-agent: rogerbot Disallow: / # S # User-agent: SalesIntelligent User-agent: SBIder User-agent: ScoutJet User-agent: Scumbot User-agent: SemrushBot User-agent: SemrushBot-SA User-agent: SEOkicks-Robot User-agent: seoscanners.net User-agent: siteBot User-agent: sitecheck.internetseer.com User-agent: SiteSnagger User-agent: SMTBot User-agent: Sosospider User-agent: Sogou web spider User-agent: spbot User-agent: Sqworm User-agent: Stratagems Kumo User-agent: SurdotlyBot Disallow: / # T # User-agent: Teleport User-agent: TeleportPro User-Agent: trendictionbot Disallow: / # U # User-agent: UbiCrawler User-agent: Uptimebot User-agent: URL_Spider_Pro Disallow: / # V # User-agent: VEDENSBOT Disallow: / # W # User-agent: WBSearchBot User-agent: WebAuto User-agent: WebCopier User-agent: WebBandit # capture bot http://www.webreaper.net/ User-agent: WebReaper User-agent: WebSauger User-agent: WebStripper User-agent: WebZIP # in recursive mode a frequent problem. User-agent: wget User-agent: woobot User-agent: Wotbox Disallow: / # X # User-agent: Xenu Disallow: / # Y # # Z # User-agent: Zao User-agent: Zealbot User-agent: ZyBORG Disallow: / # contains rules from: # - http://cpr.uni-rostock.de/robots.txt User-agent: * Disallow: /deposit Disallow: /login Disallow: /search Disallow: /api Disallow: /admin/ Crawl-delay: 10