example_scraping_digg.php
1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
<?php
include_once('../../Util_SimpleHtmlDom.class.php');
function scraping_digg() {
// create HTML DOM
$html = file_get_html('http://digg.com/');
// get news block
foreach($html->find('div.news-summary') as $article) {
// get title
$item['title'] = trim($article->find('h3', 0)->plaintext);
// get details
$item['details'] = trim($article->find('p', 0)->plaintext);
// get intro
$item['diggs'] = trim($article->find('li a strong', 0)->plaintext);
$ret[] = $item;
}
// clean up memory
$html->clear();
unset($html);
return $ret;
}
// -----------------------------------------------------------------------------
// test it!
// "http://digg.com" will check user_agent header...
ini_set('user_agent', 'My-Application/2.5');
$ret = scraping_digg();
foreach($ret as $v) {
echo $v['title'].'<br>';
echo '<ul>';
echo '<li>'.$v['details'].'</li>';
echo '<li>Diggs: '.$v['diggs'].'</li>';
echo '</ul>';
}
?>