# robots.txt file used to exclude certain directories and files # from being spidered by robots and spiders # Created August 2000 for Lake Michigan College www.lakemichigancollege.edu # by Alice Kim (kim@lakemichigancollege.edu) # # Updated 4-24-01 to exclude /lib/media.html file # # Updated 5 Nov 2002 to exclude Google image robot, pix, style, ssi and # postcard directories # # 1 May 2003 Alice Kim # Updated to disallow libwww and LWP for email site crawlers # # 18 June 2003 Alice Kim # Updated to disallow new ID lookup page and FAQ page indexing # # 29 Aug 2003 Alice Kim # Updated to disallow pdf file indexing by all and by Googlebot # # 23 Oct 2003 Alice Kim # Updated to disallow class-schedule directory indexing # # 23 Feb 2005 Alice Kim # Updated to allow Alexa crawling ia_archiver # User-agent: can also specify by name; "*" is for everyone # Disallow: if this matches first part of requested path, # forget it User-agent: EmailSiphon # disallow access to EmailSipon program Disallow: * # disallow access to all files User-agent: Googlebot-Image # disallow Googlebot image crawler Disallow: / User-agent: Googlebot # disallow Googlebot from indexing pdf files Disallow: /*.pdf$ User-agent: secretbrowser/007 # disallow secretbrowser crawler Disallow: / User-agent: libwww # disallow libwww perl requests Disallow: / User-agent: LWP::Simple # disallow LWP:Simple perl script requests Disallow: / User-agent: grub-client # disallow grub-client crawler Disallow: / User-agent: ia_archiver # disallow grub-client crawler Disallow: User-agent: * # applies to all robots Disallow: /cgi-bin/ # disallow indexing of cgi-bin directory Disallow: /download/ # disallow indexing of download directory Disallow: /graphics/ # disallow indexing of graphics directory Disallow: /images/ # disallow indexing of images directory Disallow: /javascript/ # disallow indexing of javascript directory Disallow: /ssi/ # disallow indexing of ssi directory Disallow: /style/ # disallow indexing of style directory Disallow: /lib/media.html # disallow indexing of media.html file Disallow: /pdf/ # disallow indexing of pdf directory Disallow: /postcard/ # disallow indexing of postcard directory Disallow: /test/ # disallow indexing of test directory Disallow: 404.html # disallow indexing of 404 page Disallow: 403.html # disallow indexing of 403 page Disallow: newid-faq.html # disallow indexing of new id FAQ page Disallow: newid-lookup.html # disallow indexing of new id lookup page Disallow: /cp/ # disallow indexing of cp test directory Disallow: /pdf/ # disallow indexing of pdf directory Disallow: /ss/SS_pdfs/ # disallow indexing of SS pdf directory Disallow: /cbs/forms/ # disallow indexing of CBS forms directory Disallow: /forms/ # disallow indexing of CBS forms directory Disallow: /class-schedule/ # disallow indexing of class schedule directory