写这个程序原本想提取下文本text中http的个数,但是python中运行程序总出现这个问题,并且时好时坏,有时能运行,有时报错,谁能帮忙解决下,谢谢
from __future__ import division
import json
import codecs
import sys
import string
import os,random
import re
import shutil
import jieba
import jieba.analyse
reload(sys)
sys.setdefaultencoding("utf-8")
class Comment:
def __init__(self,id,mid,text):
self.id = id
self.mid = mid
self.text = text
def getid(self):
return self.id
def getmid(self):
return self.mid
def gettext(self):
return self.text
def readallcomments(inputfile):
AllComments = []
commentFile = open(inputfile)
for line in commentFile.readlines():
sep = line.split('\t')
comment = Comment(sep[0],sep[1],sep[2])
AllComments.append(comment)
commentFile.close()
return AllComments
if __name__ == '__main__':
file=open(r'E:\\project\\myOutput.txt')
rumorcommentpath = 'E:\\project\\gerenjieshao\\'
rumorhttppath = 'E:\\project\\result\djym\\'
for line in file:
line=line.strip('\n')
outfile = open(os.path.join(rumorhttppath,line+'.txt'),'w')
inputfile = rumorcommentpath + line + '.txt'
allcomments = []
allcomments = readallcomments(inputfile)
for i in allcomments:
http = re.findall("(?isu)(http\://[a-zA-Z0-9\.\?/&\=\:]+)",i.gettext())
outfile.writelines(str(i.getid())+'\t'+str(i.getmid())+'\t'+str(len(http))+'\r\n')
print 'processing...'+line
outfile.close()