Month: August 2013

scrapping data from a webpage using php,curl and dom object:inesrt into database

<?php
set_time_limit(0);
require_once(‘simple_html_dom.php’);
    // Defining the basic cURL function
    function curl($url) {
        $ch = curl_init();  // Initialising cURL
        curl_setopt($ch, CURLOPT_URL, $url);    // Setting cURL’s URL option with the $url variable passed into the function
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // Setting cURL’s option to return the webpage data
        $data = curl_exec($ch); // Executing the cURL request and assigning the returned data to the $data variable
        curl_close($ch);    // Closing cURL
        return $data;   // Returning the data from the function
    }
    $scraped_website = curl(“http://www.yellowpages.com/los-angeles-ca/doctors?g=los+angeles%2Cca&#8221;);
    
    $doc = new DOMDocument();
  $data=@$doc->loadHTML($scraped_website);
  //print_r($data);
  $achr=$doc->getElementById(‘search-results’);
 
 // echo “<pre>”;
// print_r($achr);
$xpath = new DomXpath($doc);
$div = $xpath->query(‘//*[@class=”street-address”]’);
 for ($i = $div->length – 1; $i > -1; $i–) {
        $result[] = $div->item($i)->textContent;

    }

   // echo “<pre>”;
    //print_r($result);
    
$di = $xpath->query(‘//*[@class=”business-phone phone”]’);
 for ($i = $di->length – 1; $i > -1; $i–) {
        $re[] = $di->item($i)->textContent;
    }

   // echo “<pre>”;
    //print_r($re);    
    
$ca = $xpath->query(‘//*[@class=”city-state”]’);
 for ($i = $ca->length – 1; $i > -1; $i–) {
        $c[] = $ca->item($i)->textContent;
    }

   // echo “<pre>”;
   // print_r($c);
    
$tit = $xpath->query(‘//*[@class=”srp-business-name”]’);
 for ($i = $tit->length – 1; $i > -1; $i–) {
        $t[] = $tit->item($i)->textContent;
    }

    //echo “<pre>”;
    print_r($t);
    
$conn=mysql_connect(“localhost”,”root”,””);
mysql_select_db(‘scrapping’,$conn);
for($i=0;$i<count($t); $i++)
{
$query=”Insert into search_results values(”,'”.addslashes($result[$i]).”‘,'”.addslashes($re[$i]).”‘,'”.addslashes($c[$i]).”‘,'”.addslashes($t[$i]).”‘)”;
mysql_query($query) or die(mysql_error());
}
    exit();
?>