采集京东商城的正则

Publish: January 24, 2012 Category: PHP,正则表达式 No Comments

$url = $_POST["url"];
$contents_old = file_get_contents($url);
preg_match("/\d{3}\d+/", $url , $buyid);
preg_match("|<h1>(.*)</h1>|U", $contents_old, $jobname);
preg_match("|<img onerror(.*)>|U", $contents_old, $jobpic);
preg_match("|src=\"(.*)\"|U", $jobpic[0], $pic);
preg_match("|京东价:¥(.*)。|U", $contents_old, $price);
preg_match_all("|<li[^>]>(.*)</li>|U", $contents_old, $extra);
preg_match_all("|<h2>(.*)</h2>|U", $contents_old, $contitle);
preg_match_all("|<div class=\"con\">([\s\S]*)</div>|U", $contents_old, $conarray);

$title = trim(strip_tags($jobname[1]));
$ex = "";
foreach($extra[0] as $exone){
	if(strpos($exone,"<a") == 0){
		$ex .= $exone;
	}
	
}
$constr ="";
foreach($conarray[1] as $id=>$conone){
	$constr .= $contitle[0][$id].$conone;
}
$content = $constr;
$exdata = array(
	'title' => $title,
	'price' => $price[1],
	'picurl' => $pic[1],
	'creattime' =>  time(),
	'buyid' => $buyid[0], 
	'content' => mysql_real_escape_string($content)
);

Tags: none

Related Posts:
  • [尚无相关文章]

Leave a Comment