#特例 文本-编码联合衍生特征构造
#取第一条记录
row1 = wenbens[0]
print(row1)
#提取商品编码
filename = 'C:\Users\asus\Desktop\shiyan.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
codes = []
for row in reader:
codes.append(row[18])
code = codes[0]
print(code)
seg_lists = jieba.cut(str(row1))
output = ' '.join(seg_lists)
print("Default Mode:",output)
for word in output:
text_code = word+'-'+code
print(text_code)
运行出来的结果为:
['棉毯', '毯子|100%棉|无其他']
6301300000
Default Mode: [ ' 棉毯 ' , ' 毯子 | 100% 棉 | 无 其他 ' ]
[-6301300000
-6301300000
'-6301300000
-6301300000
棉-6301300000
毯-6301300000
-6301300000
'-6301300000
-6301300000
,-6301300000
-6301300000
-6301300000
-6301300000
'-6301300000
-6301300000
毯-6301300000
子-6301300000
-6301300000
|-6301300000
-6301300000
1-6301300000
0-6301300000
0-6301300000
%-6301300000
-6301300000
棉-6301300000
-6301300000
|-6301300000
-6301300000
无-6301300000
-6301300000
其-6301300000
他-6301300000
-6301300000
'-6301300000
-6301300000
]-6301300000
而我想要的是:
棉毯-6301300000
毯子-6301300000
100%-6301300000
棉-6301300000
无-6301300000
其他-6301300000