import pandas as pd
import numpy as np
file_path=input('请复制文件路径,并在对话框首位加一个r,不然会报错')
n=input('请输入子表sheet的实际排序位置')
def open_file(path,n):
path=str(path)
n=int(n)
matched_data=pd.read_excel(path,sheet_name=n-1)
## 标记+合并 并逐段地拆分
matched_data['粗分']=None
matched_data['粗分']=matched_data['原因'].str.contains('分拣|自身')
matched_data['粗分']=matched_data['粗分'].apply(lambda x:'自身原因' if x==True else 1)
print('这是未拆分的',matched_data['粗分'].value_counts())
matched_data0=matched_data[matched_data['粗分']=='自身原因']#自身原因数据
matched_data1=matched_data[matched_data['粗分']==1]#筛选出剩下的数据
matched_data1['粗分']=matched_data1['原因'].str.contains('上游|仓')
matched_data1['粗分']=matched_data1['粗分'].apply(lambda x:'上游原因' if x==True else 2)#匹配剩下的数据
print('这是拆掉自身原因后的数据',matched_data1['粗分'].value_counts())
matched_data00=matched_data1[matched_data1['粗分']=='上游原因']
matched_data0=pd.concat([matched_data0,matched_data00],axis=0,ignore_index=True)#上游+自身数据
matched_data1=matched_data1[matched_data1['粗分']==2]#筛选出剩下的数据
matched_data1['粗分']=matched_data1['原因'].str.contains('站点|终端')
matched_data1['粗分']=matched_data1['粗分'].apply(lambda x:'站点原因' if x==True else 3)
print('这是拆掉站点原因后的数据',matched_data1['粗分'].value_counts())
matched_data000=matched_data1[matched_data1['粗分']=='站点原因']
matched_data0=pd.concat([matched_data0,matched_data000],axis=0,ignore_index=True)#上游+自身+站点
matched_data1=matched_data1[matched_data1['粗分']==3]#筛选出剩下的数据
matched_data1['粗分']=matched_data1['原因'].str.contains('下游')
matched_data1['粗分']=matched_data1['粗分'].apply(lambda x:'下游原因' if x==True else 4)
print('这是拆掉下游原因后的数据',matched_data1['粗分'].value_counts())
matched_data0000=matched_data1[matched_data1['粗分']=='下游原因']
matched_data0=pd.concat([matched_data0,matched_data0000],axis=0,ignore_index=True)#上游+自身+站点+下游
matched_data1=matched_data1[matched_data1['粗分']==4]#筛选出剩下的数据
matched_data1['粗分']=matched_data1['粗分'].map({4:'其他原因'})
print('这是拆掉其他原因后的数据',matched_data1['粗分'].value_counts())
matched_data00000=matched_data1[matched_data1['粗分']=='其他原因']
matched_data0=pd.concat([matched_data0,matched_data00000],axis=0,ignore_index=True)
matched_data0['粗分'].value_counts()
matched_data0.to_excel("C:/Users/沈仲藩/Desktop/履约python匹/已区好分原因.xlsx",index=None)
open_file(file_path,n)
以上是源代码
运行结果及报错内容
```python
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-6-eb86fcbb83c1> in <module>
44 matched_data0['粗分'].value_counts()
45 matched_data0.to_excel("C:/Users/沈仲藩/Desktop/履约python匹/已区好分原因.xlsx",index=None)
---> 46 open_file(file_path,n)
<ipython-input-6-eb86fcbb83c1> in open_file(path, n)
3 path=str(path)
4 n=int(n)
----> 5 matched_data=pd.read_excel(path,sheet_name=n-1)
6 ## 标记+合并 并逐段地拆分
7 matched_data['粗分']=None
~\AppData\Roaming\Python\Python37\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
362 if not isinstance(io, ExcelFile):
363 should_close = True
--> 364 io = ExcelFile(io, storage_options=storage_options, engine=engine)
365 elif engine and engine != io.engine:
366 raise ValueError(
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in __init__(self, path_or_buffer, engine, storage_options)
1190 else:
1191 ext = inspect_excel_format(
-> 1192 content_or_path=path_or_buffer, storage_options=storage_options
1193 )
1194 if ext is None:
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in inspect_excel_format(content_or_path, storage_options)
1069
1070 with get_handle(
-> 1071 content_or_path, "rb", storage_options=storage_options, is_text=False
1072 ) as handle:
1073 stream = handle.handle
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
709 else:
710 # Binary mode
--> 711 handle = open(handle, ioargs.mode)
712 handles.append(handle)
713
OSError: [Errno 22] Invalid argument: 'rC:\\Users\\沈仲藩\\Desktop\\履约python匹\\已匹配.xlsx'