面馆师傅 2023-05-05 16:54 采纳率: 73.7%
浏览 56
已结题

电影网站采集修改地址

以下电影采集地址已失效,比如我要采集:https://www.kanjuwang.net/detail/?241517.html 这个站点影片,以下代码需要怎么填?


<?php
header("Content-type:text/html;charset=utf-8");
include '../inc/config.php';
$managername = $_COOKIE["managername"];
if(!$managername){
    echo "<script>alert('请登陆!'); parent.window.location.href = 'login.php';</script>";
}else{
    $sql = "SELECT * FROM `" . $mysql_pre_name . "manager` WHERE `m_name` LIKE '" . $managername . "' LIMIT 0, 1 ";
    $check_query = mysql_query($sql);
    $result = mysql_fetch_array($check_query);
    $managerlevel = $result['m_level'];
    setcookie("managername", $managername, time() + 3600);
    if($managerlevel>1){
        echo "<script>alert('您的管理权限为".$managerlevel.",无权进行此操作!'); parent.window.location.href = './';</script>";
    exit;
    }
}
?>
<input onclick="window.location.href='?page=<?=$_GET['page']?>&num=<?=$_GET['num']?>&exit=1'" type="submit" value="停止采集" />
<input onclick="window.location.href='?page=<?=$_GET['page']?>&num=<?=$_GET['num']?>'" type="submit" value="开始采集" />
<input onclick="window.location.href='<?='http://' . $_SERVER['SERVER_NAME'] . $_SERVER["SCRIPT_NAME"];?>'" type="submit" value="从头采集" />
<br /><br />
<?php
if(isset($_GET['exit']) && $_GET['exit']==1){ exit; }
if(!$_GET['page']){
    $page = 1;
}else{
    $page = $_GET['page'];
}
if(!$_GET['num']){
    $num = 0;
}else{
    $num = $_GET['num'];
}
if($_GET['page'] > 4){
    echo '采集完成';exit;
}

$type = 10;

$yurl = "http://www.iqiyi.com/lib/dianying/,,_11_".$page.".html";
$purl = file_get_contents($yurl);
preg_match_all('/class="site-piclist_info_title">(.*)<\/p>/imsU',$purl,$href1);    
foreach($href1[1] as $kh => $vh){        
    preg_match_all('/http:\/\/www.iqiyi.com\/lib\/m_(.*).html/imsU',$vh,$href);
    foreach($href[1] as $khr){
        $urlds[] = $khr;    
    }
}
$nums = $urlds[$num];
$urld = file_get_contents("http://www.iqiyi.com/lib/m_".$nums.".html");
preg_match('/data-doc-id="(.*)"/imsU',$urld,$tid);
preg_match('/片名:(.*);/imsU',$urld,$titles);
preg_match('/主演:(.*);/imsU',$urld,$starrings);
preg_match('/导演:(.*);/imsU',$urld,$directeds);
preg_match('/<div class="look_point">(.*)<\/div>/imsU',$urld,$tags);
preg_match_all('/>(.*)<\/a>/imsU',$tags[1],$tagss);
foreach($tagss[1] as $kat){
    $tst = str_ireplace(",","",$kat);
    $tst = str_ireplace("\n","",$tst);
    $tst = str_ireplace("\r","",$tst);
    $str.= $tst.',';
    $tag = rtrim($str, ",");
}
preg_match('/data-movlbshowmore-ele="whole">(.*)<\/p>/imsU',$urld,$contents);
preg_match('/<div class="result_pic">(.*)<\/div>/imsU',$urld,$pic1);
    preg_match('/<img(.*)src="(.*)"/imsU',$pic1[1],$pic2);    
$domin = file_get_contents('http://search.video.iqiyi.com/m?if=video_library&video_library_type=play_source&platform=1&key='.$tid[1]);                
$json_domin = json_decode($domin,true);
$info = $json_domin['video_info'];        
$from = $json_domin['site'];
$content = $contents[1];
foreach($info as $karr){
//    $title = $titles[1];        //标题
    $title = $karr['title'];    //标题
    $pic = $pic2[2];            //图片
    $starring = str_ireplace("、",",",$starrings[1]);    //主演
    $directed = $directeds[1];    //导演
    $play_url = Current(explode('?',$karr['play_url']));
    $playurl .= $title.'$'.$play_url."#";    //播放地址组合
}
$group = "
标题:$title<br/>
标签:$tag<br/>
图片:$pic<br/>
主演:$starring<br/>
导演:$directed<br/>
来源:$from<br/>
播放地址:$playurl<br/><br/>
";

$sql = "SELECT * FROM `".$mysql_pre_name."vod` WHERE `d_name` LIKE '$title' AND `d_type` = ".$type;
$query = mysql_query($sql);
$row = mysql_fetch_array($query);
$did = $row['d_id'];
$d_name = $row['d_name'];

if(!$d_name){
    $mysql = true;
}else{
    $mysql = false;
}
if($mysql){
    if($play_url==''||$title==''||$from==''||$pic==''){
        echo $group.'电影《'.$title.'》播放地址为空,入库失败,3秒后继续<script>window.setTimeout("window.location=\'?page='.$page.'&num='.($num+1).'\'",1000); </script>';
    if($_GET['num'] > 28){
        echo '<script>window.setTimeout("window.location=\'?page='.($page+1).'&num=0\'",1000); </script>';
    }
        exit;
    }
    $sqlrk = "INSERT INTO `".$mysql_pre_name."vod` (`d_name`, `d_pic`, `d_picthumb`, `d_picslide`, `d_writer`, `d_starring`, `d_directed`, `d_tag`, `d_remarks`, `d_type`, `d_level`, `d_usergroup`, `d_addtime`, `d_content`, `d_playfrom`, `d_playurl`, `d_reading`) VALUES ('$title', '$pic', '', '', '$managername', '$starring', '$directed', '$tag', '', '$type', '0', '0', NOW(), '$content', '$from', '$playurl', '152');";
    $result = mysql_query($sqlrk);
    if ($result) {
        echo $group.'电影《'.$title.'》采集入库成功,3秒后继续<script>window.setTimeout("window.location=\'?page='.$page.'&num='.($num+1).'\'",1000); </script>';
    }else{
        echo $group.'电影《'.$title.'》采集出现错误,入库失败,3秒后继续<script>window.setTimeout("window.location=\'?page='.$page.'&num='.($num+1).'\'",1000); </script>';
    }
}else{
    $sqlrk = "UPDATE `$mysql_database`.`".$mysql_pre_name."vod` SET `d_playurl` = '$playurl', `d_addtime` = NOW() WHERE `".$mysql_pre_name."vod`.`d_id` = $did;";
    $result = mysql_query($sqlrk);
    if ($result){
        echo $group.'电影《'.$title.'》已存在,无需采集,直接覆盖播放地址,3秒后继续<script>window.setTimeout("window.location=\'?page='.$page.'&num='.($num+1).'\'",1000); </script>';
    }else{
        echo $group.'电影《'.$title.'》采集出现错误,入库失败,3秒后继续<script>window.setTimeout("window.location=\'?page='.$page.'&num='.($num+1).'\'",1000); </script>';
    }
}
if($_GET['num'] > 28){
    echo '<script>window.setTimeout("window.location=\'?page='.($page+1).'&num=0\'",1000); </script>';
}
//        print_r($karr);

?>

  • 写回答

4条回答 默认 最新

  • EdsionWang 2023-05-05 17:11
    关注

    爬虫程序是需要根据你的需求来编写的。你给的例子里,应该是一个电影列表,通过循环把每一页的电影信息都爬取出来。而你问题中给的url是一个电影的详细页,通过正则表达式来获取你的信息。

    评论

报告相同问题?

问题事件

  • 系统已结题 5月13日
  • 创建了问题 5月5日

悬赏问题

  • ¥15 阿里云函数计算自定义层部署LibreOffice
  • ¥15 打开软件提示错误:failed to get wglChoosePixelFormatARB
  • ¥30 电脑误删了手机的照片怎么恢复?
  • ¥15 (标签-python|关键词-char)
  • ¥15 python+selenium,在新增时弹出了一个输入框
  • ¥15 苹果验机结果的api接口哪里有??单次调用1毛钱及以下。
  • ¥20 学生成绩管理系统设计
  • ¥15 来一个cc穿盾脚本开发者
  • ¥15 CST2023安装报错
  • ¥15 使用diffusionbert生成文字 结果是PAD和UNK怎么办