1、连接HDFS:
package api;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
public class Utils {
public static FileSystem HDFS() throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://potter2:9000");
System.setProperty("HADOOP_USER_NAME", "potter");
FileSystem fs = FileSystem.get(conf);
return fs;
}
}
2、主程序代码:
package api;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Test;
/**
* 删除HDFS集群中的所有空文件和空目录
* @author Administrator
*
*/
public class Empty {
@Test
public void tt() throws Exception {
Path path = new Path("/");
Empty1(path);
}
public static void Empty1(Path path) throws Exception {
FileSystem fs = Utils.HDFS();
//当是空文件时,判断当前路径下有几个空文件夹
FileStatus[] listStatus = fs.listStatus(path);
System.out.println(listStatus.length+"********");
//当根目录没有文件的时候会进入if里面
if (listStatus.length == 0) {
fs.delete(path,true);
}
System.out.println("删除成功xxxxx");
//迭代器用于遍历
RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(path);
while (listLocatedStatus.hasNext()) {
LocatedFileStatus next = listLocatedStatus.next();
//输出文件夹的目录
Path currentPath = next.getPath();
System.out.println(currentPath+"1111111");
//输出上面文件夹的父亲目录
Path parent = next.getPath().getParent();
System.out.println(parent+"2222222");
if (next.isDirectory()) {
//如果是空文件夹
if (fs.listStatus(currentPath).length == 0) {
//删除掉
fs.delete(currentPath,true);
}else {
//不是空文件夹,那么继续遍历
if (fs.exists(currentPath)) {
Empty1(next.getPath());
}
}
//如果是文件
}else {
//获取文件的长度
long fileLength = next.getLen();
//当文件是空文件时,删除
if (fileLength ==0) {
fs.delete(currentPath,true);
}
}
int length = fs.listStatus(parent).length;
if (length ==0) {
fs.delete(parent,true);
}
}
}
}
版权声明:本文为qq_41851454原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。