php正则获取网页关键词keywords,description,title

 
更多
//php正则获取网页关键词keywords
function get_keywords($html)
{
$html=strtolower($html);
preg_match("@<head[^>]*>(.*?)</head>@si",$html, $regs);
$headdata = $regs[1];
preg_match("/<meta +name *=["']?keywords["']? *content=["']?([^<>"]+)["']?/i", $headdata, $res);
if (isset ($res)) {
$keywords = $res[1];
}
if ($keywords == "") $keywords = "无";
$keywords=replace_word(textcut($keywords,250));
$keywords=str_replace("-",",",$keywords);
$keywords=str_replace(",",",",$keywords);
$keywords=str_replace(" ",",",$keywords);
$keywords=str_replace("|",",",$keywords);
$keywords=str_replace("、",",",$keywords);
$keywords=str_replace(",,",",",$keywords);
$keywords=str_replace("<","",$keywords);
$keywords=str_replace(">","",$keywords);
return addslashes(trim($keywords));
}
 

//php正则获取网页标题,代码如下
 
function get_title($html)
{
  $html=strtolower($html);
 $title = str_replace(" - ",",",cut($html,"<title>", "</title>" ));
//if ($title == "") $title = "无标题";
if ($title) $title=replace_word(textcut($title,80));
if ($title) $title=preg_replace("/<(.*?)>/","",$title);
return addslashes(trim($title));
}
 

//php正则获取网页描述description,代码如下

function get_description($html)
{
$html=strtolower($html);
preg_match("@<head[^>]*>(.*?)</head>@si",$html, $regs);
$headdata = $regs[1];
preg_match("/<meta +name *=["']?description["']? *content=["']?([^<>"]+)["']?/i", $headdata, $res);
      if (isset ($res)) {
       $description = $res[1];
      }
if ($description == "") $description = "无";
$description=replace_word(textcut($description,250));
$description=str_replace("-",",",$description);
$description=str_replace(",",",",$description);
$description=str_replace(" ",",",$description);
$description=str_replace("|",",",$description);
$description=str_replace("、",",",$description);
$description=str_replace(",,",",",$description);
$description=str_replace("<","",$description);
$description=str_replace(">","",$description);
return addslashes(trim($description));
}
打赏

本文固定链接: https://www.cxy163.net/archives/1029 | 绝缘体

该日志由 绝缘体.. 于 2016年03月21日 发表在 首页 分类下,
原创文章转载请注明: php正则获取网页关键词keywords,description,title | 绝缘体

报歉!评论已关闭.