PHP robot detection

Started by mario, December 26, 2008, 11:50:23 PM

Previous topic - Next topic

mario

This is a nice script that watch for robots.

<?php
/**
* Detecting if it is a robot or not
* @return bool
*/

function is_robot(){

$robots = array(
"Accoona-AI-Agent",
"AOLspider",
"BlackBerry",
"[email protected]",
"CazoodleBot",
"CFNetwork",
"ConveraCrawler",
"Cynthia",
"Dillo",
"discoveryengine.com",
"DoCoMo",
"ee://aol/http",
"exactseek.com",
"fast.no",
"FAST MetaWeb",
"FavOrg",
"FS-Web",
"Gigabot",
"GOFORITBOT",
"gonzo",
"Googlebot-Image",
"holmes",
"HTC_P4350",
"HTML2JPG Blackbox",
"http://www.uni-koblenz.de/~flocke/robot-info.txt",
"iArchitect",
"ia_archiver",
"ICCrawler",
"ichiro",
"IEAutoDiscovery",
"ilial",
"IRLbot",
"Keywen",
"kkliihoihn nlkio",
"larbin",
"libcurl-agent",
"libwww-perl",
"Mediapartners-Google",
"Metasearch Crawler",
"Microsoft URL Control",
"MJ12bot",
"T-H-U-N-D-E-R-S-T-O-N-E",
"voodoo-it",
"www.aramamotorusearchengine.com",
"archive.org_bot",
"Teoma",
"Ask Jeeves",
"AvantGo",
"Exabot-Images",
"Exabot",
"Google Keyword Tool",
"Googlebot",
"heritrix",
"www.livedir.net",
"iCab",
"Interseek",
"jobs.de",
"MJ12bot",
"pmoz.info",
"SnapPreviewBot",
"Slurp",
"Danger hiptop",
"MQBOT",
"msnbot-media",
"msnbot",
"MSRBOT",
"NetObjects Fusion",
"nicebot",
"nrsbot",
"Ocelli",
"Pagebull",
"PEAR HTTP_Request class",
"Pluggd/Nutch",
"psbot",
"Python-urllib",
"Regiochannel",
"SearchEngine",
"Seekbot",
"segelsuche.de",
"Semager",
"ShopWiki",
"Snappy",
"Speedy Spider",
"sproose",
"TurnitinBot",
"Twiceler",
"VB Project",
"VisBot",
"voyager",
"VWBOT",
"Wells Search",
"West Wind",
"Wget",
"WWW-Mechanize",
"www.show-tec.net",
"xxyyzz",
"yacybot",
"Yahoo-MMCrawler",
"yetibot",
);


foreach(
$robots as $robot){ 
if(stristr($_SERVER["HTTP_USER_AGENT"],$robot)){ 
$from_spider=true;
break;


 
if(
$from_spider==true){
return true;
}
else
{
return false;
}

}
?>