function get_robots_txt($url)
{
$parsed_url = parse_url($url);
$robotstxt = @file("http://{$parsed_url['host']}/robots.txt");
return $robotstxt;
}
function robots_allowed_crawl($url, $robotstxt)
{
$parsed_url = parse_url($url);
if (empty($robotstxt)) return true;
$rules = array();
foreach($robotstxt as $line)
{
if(!$line = trim($line)) continue;
if (preg_match('/^\s*Disallow:(.*)/i', $line, $regs))
{
if (!$regs[1]) return true;
$rules[] = preg_quote(trim($regs[1]), '/');
}
}
foreach($rules as $rule)
if(preg_match("/^$rule/", $parsed_url['path'])) return false;
return true;
}
$url = "https://www.website.com/order.php";
$robotstxt = get_robots_txt($url);
if (robots_allowed_crawl($url, $robotstxt))
$html = file_get_contents($url);
/*
run:
*/