qq_32264525
qq_32264525
2017-10-20 10:56

为什么我的正则表达式截取网站源代码的图片没输出 而其他字符串又行?

  • 源代码
  • 正则表达式

public class DownImgUtil {

public static String htmlSource(String link,String encoding) {
    StringBuffer sb=new StringBuffer();
    InputStreamReader in=null;
    try {

        URL url=new URL(link);

        URLConnection uc=url.openConnection();
        //װ
        uc.setRequestProperty("User-Agent", "java");

        InputStream inputStream=uc.getInputStream();
        in=new InputStreamReader(inputStream,encoding);

        BufferedReader reader= new BufferedReader(in);

        String line =null;

        while((line=reader.readLine())!=null){
            sb.append(line+"\n");
        }

        } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }finally {
        try {
            in.close();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    return sb.toString();

}
public static void getImg(String source) {
    String regex="<img[\\s]+src=\"(https://|http://)[^\"]+?.(jpg|png|gif)\">";
    Pattern p=Pattern.compile(regex);
    Matcher m=p.matcher(source);
    while(m.find()){
        //m.group().matches("(https://|http://)([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))");
    Pattern p1=Pattern.compile("(https://|http://)[^\"]+?.(jpg|png|gif)");
    Matcher m1=p1.matcher(m.group());
    while(m1.find()){
        System.out.println(m1.group());
    }

    }

}

public static void main(String[] args) {
String sb=htmlSource("http://foxue.qq.com/", "gbk");
String sa="adsaxzxsx";
getImg(sa);

}
}

  • 点赞
  • 回答
  • 收藏
  • 复制链接分享

0条回答