2020-04-29 13:24:29 +02:00
|
|
|
|
2020-04-04 14:03:43 +02:00
|
|
|
package eu.dnetlib.dhp.common;
|
|
|
|
|
2020-04-18 12:42:58 +02:00
|
|
|
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
|
|
|
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.stream.Collectors;
|
2020-04-29 13:24:29 +02:00
|
|
|
|
2020-04-04 14:03:43 +02:00
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
|
import org.apache.hadoop.fs.FileStatus;
|
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
|
import org.apache.hadoop.fs.Path;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
2020-04-18 12:42:58 +02:00
|
|
|
/** HDFS utility methods. */
|
2020-04-04 14:03:43 +02:00
|
|
|
public class HdfsSupport {
|
2020-04-29 13:24:29 +02:00
|
|
|
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
|
2020-04-04 14:03:43 +02:00
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
private HdfsSupport() {
|
|
|
|
}
|
2020-04-04 14:03:43 +02:00
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
/**
|
|
|
|
* Checks a path (file or dir) exists on HDFS.
|
|
|
|
*
|
|
|
|
* @param path Path to be checked
|
|
|
|
* @param configuration Configuration of hadoop env
|
|
|
|
*/
|
|
|
|
public static boolean exists(String path, Configuration configuration) {
|
|
|
|
logger.info("Removing path: {}", path);
|
|
|
|
return rethrowAsRuntimeException(
|
|
|
|
() -> {
|
|
|
|
Path f = new Path(path);
|
|
|
|
FileSystem fileSystem = FileSystem.get(configuration);
|
|
|
|
return fileSystem.exists(f);
|
|
|
|
});
|
|
|
|
}
|
2020-04-17 13:12:44 +02:00
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
/**
|
|
|
|
* Removes a path (file or dir) from HDFS.
|
|
|
|
*
|
|
|
|
* @param path Path to be removed
|
|
|
|
* @param configuration Configuration of hadoop env
|
|
|
|
*/
|
|
|
|
public static void remove(String path, Configuration configuration) {
|
|
|
|
logger.info("Removing path: {}", path);
|
|
|
|
rethrowAsRuntimeException(
|
|
|
|
() -> {
|
|
|
|
Path f = new Path(path);
|
|
|
|
FileSystem fileSystem = FileSystem.get(configuration);
|
|
|
|
if (fileSystem.exists(f)) {
|
|
|
|
fileSystem.delete(f, true);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2020-04-04 14:03:43 +02:00
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
/**
|
|
|
|
* Lists hadoop files located below path or alternatively lists subdirs under path.
|
|
|
|
*
|
|
|
|
* @param path Path to be listed for hadoop files
|
|
|
|
* @param configuration Configuration of hadoop env
|
|
|
|
* @return List with string locations of hadoop files
|
|
|
|
*/
|
|
|
|
public static List<String> listFiles(String path, Configuration configuration) {
|
|
|
|
logger.info("Listing files in path: {}", path);
|
|
|
|
return rethrowAsRuntimeException(
|
|
|
|
() -> Arrays
|
|
|
|
.stream(FileSystem.get(configuration).listStatus(new Path(path)))
|
|
|
|
.filter(FileStatus::isDirectory)
|
|
|
|
.map(x -> x.getPath().toString())
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
}
|
2020-04-04 14:03:43 +02:00
|
|
|
}
|