目录
1. Python调用HDFS的API
- 1. Python调用HDFS的API
- 安装依赖包
[root@bigdata001 ~]#
[root@bigdata001 ~]# pip3 install hdfs
[root@bigdata001 ~]#
- 删除HDFS目录下的所有文件或文件夹
from hdfs import InsecureClient
if __name__ == '__main__':
hdfs_url = "http://192.168.8.111:9870;http://192.168.8.112:9870;http://192.168.8.113:9870"
insecure_client = InsecureClient(url = hdfs_url, user = 'root', root = '/')
delete_path = "/test_path"
child_paths = insecure_client.list(delete_path)
for child_path in child_paths:
insecure_client.delete(hdfs_path=delete_path+"/"+child_path,
recursive=True, skip_trash=True)
- 下载HDFS上的文件到本地
insecureClient.download(hdfs_path = '/test_path/part-00000',
local_path = '/opt/test_path',
overwrite=True, n_threads=1)