我在执行下面程序的时候,表突然值为空了,也无法正常操作了,想请教一下是什么原因
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv(r"C:/Users/szp/Desktop/assignment1/MRV_2019.csv")#import file, the path of file may need to be change in differnt computer
df
#统计值为空的行
num = df.isna().sum()
num
#删除表中有'Division by zero!','Not Applicable'的行
df = df.replace(['Division by zero!','Not Applicable'],np.NaN)
df = df.dropna(how="any")
df
#check the data type
df.info()
#change data type
df[['Total fuel consumption [m tonnes]','Total CO2 emissions [m tonnes]','Annual Total time spent at sea [hours]', 'Annual average Fuel consumption per distance [kg / n mile]', 'Annual average CO2 emissions per distance [kg CO2 / n mile]']] = df[['Total fuel consumption [m tonnes]','Total CO2 emissions [m tonnes]','Annual Total time spent at sea [hours]', 'Annual average Fuel consumption per distance [kg / n mile]', 'Annual average CO2 emissions per distance [kg CO2 / n mile]']].astype('float64')
#check the data type again
df.info()
#把'Port of Registry'这列的内容全改成大写
df['Port of Registry'] = df['Port of Registry'].apply(lambda x: x.upper())
df
#将'Technical efficiency [gCO2/t·nm]'中的值分成两列,一列只有EEDI或EVI,另一列只有数值,并删除'Technical efficiency [gCO2/t·nm]'列
eff = df['Technical efficiency [gCO2/t·nm]'].str.split('(',expand=True)
eff.columns = ['Technical Efficiency Index','Technical Efficiency Value']
eff['Technical Efficiency Value'] = eff['Technical Efficiency Value'].str.rstrip(')')
df = pd.concat([df,eff], axis=1)
df = df.drop('Technical efficiency [gCO2/t·nm]', axis=1)
df
#move 'Technical Efficiency Index', 'Technical Efficiency Value' to other place
title = df.columns.tolist()
title.insert(3, title.pop(title.index('Technical Efficiency Index')))
title.insert(4, title.pop(title.index('Technical Efficiency Value')))
df = df[title]
#delete the empty unit in technical efficiency value
df = df.dropna(axis=0, how='any')
df
#调用sqlite
import sqlite3
#建立与表mrv2019的连接
connection = sqlite3.connect('mrv2019.db')
#Creating a cursor object using the cursor() method
cursor = connection.cursor()
#Doping mrv2019 table if already exists.
cursor.execute("DROP TABLE IF EXISTS mrv2019")
#Creating table as per requirement
sql = '''CREATE TABLE mrv2019(
IMO PRIMARY KEY,
Name TEXT,
Type INTEGER,
TechnicEfficiencyIndex VARCHAR,
TechnicalEfficiencyValue NUMERIC,
PortofRegistry VARCHAR,
DoCissue TEXT,
DoCexpiry TEXT,
VerifierName VARCHAR,
VerifierCountry VARCHAR,
TotalFuel NUMERIC,
TotalCO2 NUMERIC,
TimeatSea NUMERIC,
FuelperMile NUMERIC,
CO2perMile NUMERIC
);'''
cursor.execute(sql)
# Commit the changes in the database
connection.commit()
#向新建的表填充数据
cursor = connection.cursor()
for index, row in df.iterrows():
cursor.execute("INSERT INTO mrv2019 VALUES (? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,?, ?)", (row['IMO Number'], row['Name'], row['Ship type'], row['Technical Efficiency Index'], row['Technical Efficiency Value'], row['Port of Registry'], row['DoC issue date'], row['DoC expiry date'], row['Verifier Name'], row['Verifier Country'], row['Total fuel consumption [m tonnes]'], row['Total CO2 emissions [m tonnes]'], row['Annual Total time spent at sea [hours]'], row['Annual average Fuel consumption per distance [kg / n mile]'], row['Annual average CO2 emissions per distance [kg CO2 / n mile]']
))
#对PortofRegistry列进行统计计算,得出不同港出现的次数,并把排名前21的港显示出来,画出bar图
sql = pd.read_sql("SELECT (PortofRegistry) AS name, COUNT(*) FROM mrv2019 Limit21", connection)
sql
#新建两个以IMO number为索引的表,一个表只含有Technical Efficiency Index为EEDI的船舶信息,另一个表只含有Technical Efficiency Index为EVI的船舶信息
df_eedi = df[df["Technical Efficiency Index"] == "EEDI"]
df_eedi.set_index('IMO Number')
df_evi = df[df["Technical Efficiency Index"] == "EIV"]
df_evi.set_index('IMO Number')