互动

大数据 | 数据清洗（pd第五套）

Summer 字数: 942 阅读耗时: 2 分钟 2025/11/04 2025/11/05 博客独享热度: 19 评论: 0

本文最后更新于 2025-11-05，文章内容可能已经过时。

import pandas as pd
import numpy as np
import os
p = os.getcwd()
print(p)
src_file = f"{p}/src/distribution.csv"
srcdf = pd.read_csv(src_file)
sums = srcdf.isnull().sum()
df2 = pd.DataFrame(
    {
        'Column': sums.index,
        'Null_Count': sums.values
    }
)
print(df2)
df2.to_csv(f"{p}/src/result_1.csv", index=False)

import pandas as pd
import numpy as np

src_file = "鞍山.xlsx"
src_df = pd.read_excel(src_file)

def judging(day, hitmp, lowtmp, weather):
    hitmp = int(hitmp)
    lowtmp = int(lowtmp)
    if day == "星期六" or day == "星期日":
        if "雨" not in weather:
            if hitmp <= 30 and lowtmp >= 18:
                return "是"
    return "否"

src_df["是否适合出行游玩"] = src_df.apply(lambda row: judging(
    row['weekday'],
    row['hightest_tem'],
    row["lowest_tem"],
    row["weather"]
), axis = 1)

src_df.to_excel("taged_data.xlsx", index=False)