在用c语言写爬虫(入坑),想用正则表达式来判断url结果发现复杂的判断(在其他正则表达式测试工具可以正常判断出来)没法判断出来,换成简单的测试了下(代码如下匹配是简单的)可以匹配出来,复杂的:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include<string.h>
#include<regex.h>
#define MAX 1024
char* substr(const char*str, unsigned start, unsigned end)
{
unsigned n = end - start;
static char stbuf[256];
strncpy(stbuf, str + start, n);
stbuf[n+1] = '\0';
return stbuf;
}
int main()
{
int p;
regex_t reg;
regmatch_t pm[1];
char *pattern="http://.*\"";
FILE *fp;
char buf[MAX];
p=regcomp(®,pattern,REG_ICASE);
if(p!=0){
printf("error!");
}
if((fp=fopen("/home/wrj/3","r"))==NULL){
printf("Fopen error!\n");
exit(1);
}
while(fgets(buf,MAX,fp)){
p=regexec(®,buf,1,pm,0);
if(p==REG_NOMATCH) continue;
else{
printf("%s\n",substr(buf,pm[0].rm_so,pm[0].rm_eo));
}
}
fclose(fp);
return 0;
}
这哪里有错呢?有注意到转义。。。。还是说这是regex.h的问题==