import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import fileinput
import time
pd.options.display.max_columns=None
start = time.time()
data=pd.read_csv('C:\Users\丹心傲雪\Desktop\毕业论文冲鸭\1001-CD\1001-CD.txt') #The path of data file
data.columns=['carid','orderid','time','longitude','latitude'] #添加列标签
orderid_list=np.array(data['orderid'].drop_duplicates()) #订单号列表
columns=['carid','orderid','starttime','endtime','longitude','latitude']
data_bak = pd.DataFrame(columns=columns)
append_dic = {}
data_end_time = []
for i in range(len(orderid_list)):
order=data[data['orderid']==orderid_list[i]]#根据订单号筛选数据
order.sort_values("time",inplace=True) #对同一订单的时间进行排序
order=np.array(order) #将df转为array
for j in range(len(order[0])):
append_dic[columns[j]] = order[0][j]
append_dic['endtime'] = order[-1][2]
data_bak = data_bak.append([append_dic],ignore_index=True)
data_bak.to_csv('data.csv',index=False)
end = time.time()
print("运行时间:%.2f秒"%(end-start))