老天哥
2017-07-19 15:45
采纳率: 100%
浏览 4.0k
已采纳

python怎么把jieba分词后的结果导入excel中啊

-*- coding:UTF-8 -*-

import urllib2
import re
import jieba
import jieba.analyse
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import xlwt
from xlrd import open_workbook
from xlutils.copy import copy
wordList={}
key_list = []

x=1
for x in range(2):
urlstr='http://yyk.qqyy.com/search_dp0lmhks0i'+str(x+1)+'.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
}
req = urllib2.Request(
url=urlstr,
headers=headers
)
myResponse = urllib2.urlopen(req)
html = myResponse.read().decode('utf-8')
pattern = re.compile(
'

.*?

.*?.*?.*?

.*?

\r\n.*?(.*?)\r\n*?(.*?)\r\n.*?(.*?)

.*?

.*?(.*?)

.*?

.*?(.*?)

.*?
', re.S
)
items = re.findall(pattern, html)
# for x in items:
# print x[0],x[1],x[2],x[3],x[4],x[5]
for xdetail in items:
# print xdetail[0], xdetail[1], xdetail[2], xdetail[3], xdetail[4], xdetail[5]
xdetailtext = re.subn(u' |\(|\)|(|)|<.*?>|()|</.*?>', "", xdetail[0])
# 用正则表达式把多余的字符清洗掉
# print xdetailtext[0]
    seg_list = jieba.cut(xdetailtext[0], cut_all=True)
    for word1 in seg_list:
        if wordList.has_key(word1):
            wordList[word1] += 1
        else:
            wordList[word1] = 1
    # print  wordList

for wordkey in wordList.keys():
print wordkey + ":" + str(wordList[wordkey])

    这个是我到分词的程序,就是不会导入到excel中啊

1条回答 默认 最新

相关推荐 更多相似问题