path ='.../data0ape'
def get_filelist(dir):
Filelist = []
for home, dirs, files in os.walk(path):
for filename in files:
# 文件名列表,包含完整路径
Filelist.append(os.path.join(home, filename))
# # 文件名列表,只包含文件名
# Filelist.append( filename)
return Filelist
Filelist = get_filelist(dir)
print(len( Filelist))
for file in Filelist :
#print(file)
txtfile = open(file, "r")
lineStr1 = txtfile.readline()
word = lineStr1[13:].split(" ")[0]
#df
files = pd.read_csv(file,sep='\s+',header = None, skiprows= 8,keep_default_na=False)
data = pd.DataFrame(files)
data['time'] = data[1].apply(lambda x:x[0:8])
data_new = data
data_new[1] = data_new['time']
data_new = data_new.groupby(by = 'time').mean()
data_new = data_new.reset_index(drop=False)
df1 = data_new.iloc[:,:2 ]
dftest = pd.DataFrame({'time': pd.date_range(start= df1['time'][0], end=df1['time'].iloc[-1], freq='S')})
df1['time'] = df1['time'].astype('str')
start = df1['time'][0]
end = df1['time'][(len(df1) - 1)]
time = pd.date_range(start=start, end=end, freq='S')
str1 = time.strftime("%Y-%m-%d %H:%M:%S").to_list()
time2 = [x.split(' ')[1] for x in str1]
df2 = pd.DataFrame({'time': time2})
df3 = pd.merge(df1, df2, on='time', how="right")
df4 = df3.drop('time', axis = 1)
#add id
df4.loc[-1] = word
df4.index = df4.index + 1
df4 = df4.sort_index()
本人想使用上述代码块实现————从某文件夹子文件夹的.txt提取要素作为数据文件,在运行时“lineStr1 = txtfile.readline()”报错,'utf-8' codec can't decode byte 0x88 in position 296: invalid start byte,修改‘r’为‘encoding='ISO-8859-1'后,files = pd.read_csv(file,sep='\s+',header = None, skiprows= 8,keep_default_na=False)又报错Error tokenizing data. C error: Expected 3 fields in line 10, saw 9。源代码在单步调试时是可以运行的,最后我想实现所有从file提取的df整合到一个df文件里面。