CURL,
function Visit($irc_server){
// Open the connection
$user_agent = $_SERVER['HTTP_USER_AGENT'];
$port = '80';
$ch = curl_init(); // initialize curl handle
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_URL, $irc_server);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_TIMEOUT, 50);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_PORT, $port);
$data = curl_exec($ch);
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curl_errno = curl_errno($ch);
$curl_error = curl_error($ch);
if ($curl_errno > 0) {
$return = ("cURL Error ($curl_errno): $curl_error\n");
} else {
$return = $data;
}
curl_close($ch);
/*if($httpcode >= 200 && $httpcode < 300){
$return = 'OK';
}else{
$return ='Nok';
}*/
return $return;
}
URL
function tenta($url){
$crawler = new MyCrawler();
$crawler->setURL($url);
$crawler->addContentTypeReceiveRule("#text/html#");
$crawler->addURLFilterRule("#\.(jpg|jpeg|gif|png)$# i");
$crawler->enableCookieHandling(true);
$crawler->setTrafficLimit(1000 * 1024);
$crawler->go();
$report = $crawler->getProcessReport();
if (PHP_SAPI == "cli") $lb = "\n";
else $lb = "<br />";
}
set_time_limit(110000);
include("libs/PHPCrawler.class.php");
class MyCrawler extends PHPCrawler
{
function handleDocumentInfo($DocInfo)
{
global $find;
if (PHP_SAPI == "cli") $lb = "\n";
else $lb = "<br />";
echo "Page requested: ".$DocInfo->url." (".$DocInfo->http_status_code.")".$lb;
foreach ($find as $matche) {
$matchb = implode(',',$matche);
if(preg_match("/(".$matchb.")/i", Visit($DocInfo->url))) {
echo "<a href=".$DocInfo->url." target=_blank>".$DocInfo->url."</a><b style='color:red;'>".$matche['word']."</b>".$lb;
}
}
echo "Referer-page: ".$DocInfo->referer_url.$lb;
if ($DocInfo->received == true)
echo "Content received: ".$DocInfo->bytes_received." bytes".$lb;
else
echo "Content not received".$lb;
echo $lb;
flush();
}
}
.
$url = array(
array("id"=>7, "name"=>"soltechit","url" => "soltechit.co.uk"),
array("id"=>5, "name"=>"CNN","url" => "cnn.com", "description" => "A social utility that connects people, to keep up with friends, upload photos, share links")
);
strings we are looking for
$find = array(
array("word" => "routers"),
array("word" => "Moose"),
array("word" => "worm"),
array("word" => "kenya"),
array("word" => "alshabaab"),
array("word" => "ISIS"),
array("word" => "security"),
array("word" => "windows 10 release"),
array("word" => "hacked")
);
foreach ($url as $urls) {
$url = $urls['url'];
echo '<h2>'.$urls['name'].'</h2>';
echo $urls['description'].'<br>';
echo tenta($url).'<br>';
}