大数据 | 数据清洗(pd第五套)
import pandas as pd
import numpy as np
import os
p = os.getcwd()
print(p)
src_file = f"{p}/src/distribution.csv"
srcdf = pd.read_csv(src_file)
sums = srcdf.isnull().sum()
df2 = pd.DataFrame(
{
'Column': sums.index,
'Null_Count': sums.values
}
)
print(df2)
df2.to_csv(f"{p}/src/result_1.csv", index=False)import pandas as pd
import numpy as np
src_file = "鞍山.xlsx"
src_df = pd.read_excel(src_file)
def judging(day, hitmp, lowtmp, weather):
hitmp = int(hitmp)
lowtmp = int(lowtmp)
if day == "星期六" or day == "星期日":
if "雨" not in weather:
if hitmp <= 30 and lowtmp >= 18:
return "是"
return "否"
src_df["是否适合出行游玩"] = src_df.apply(lambda row: judging(
row['weekday'],
row['hightest_tem'],
row["lowest_tem"],
row["weather"]
), axis = 1)
src_df.to_excel("taged_data.xlsx", index=False)
本文是原创文章,采用 CC BY-NC-ND 4.0 协议,完整转载请注明来自 Summer
评论
匿名评论
隐私政策
你无需删除空行,直接评论以获取最佳展示效果