CCF | Python HDFS 编程

示例程序
from hdfs import InsecureClient
from collections import defaultdict
hdurl = "http://172.16.115.212:50070"
hduser = "root"
def createcli() -> InsecureClient:
return InsecureClient(url= hdurl,
user= hduser)
def deleteFile(cli: InsecureClient):
cli.delete("/result/mr_alldata.csv")
def mapper(line):
line = line.strip()
line = line.split(",")[-1]
if line == "url":
return False
if "https://" in line:
url = line.split("https://")[1].split("/")[0]
return (url, 1)
else:
url = line.split("http://")[1].split("/")[0]
return (url, 1)
def reducer(cli: InsecureClient):
res = defaultdict(int)
with cli.read("/logdata/alldata.csv", encoding="utf-8") as reader:
f = reader.readlines()
for i in f:
result = mapper(i)
if result:
domain, count = result
res[domain] += count
res = list(res.items())
sorts = sorted(res,
key = lambda x: x[1],
reverse=True)
output = ""
for domain, counts in sorts:
output += f"({domain},{counts})\n"
print("testing results:")
for i in sorts[:10]:
print(i)
if __name__ == "__main__":
cli = createcli()
deleteFile(cli)
reducer(cli)
本文是原创文章,采用 CC BY-NC-ND 4.0 协议,完整转载请注明来自 Summer
评论
匿名评论
隐私政策
你无需删除空行,直接评论以获取最佳展示效果