forked from antonis.lempesis/dnet-hadoop
[dhp-actionmanager] common package added with utility classes supporting hadoop and spark envs
This commit is contained in:
parent
ad70c23b2e
commit
f9f7350bb9
|
@ -0,0 +1,55 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides serializable and throwing extensions to standard functional interfaces.
|
||||||
|
*/
|
||||||
|
public class FunctionalInterfaceSupport {
|
||||||
|
|
||||||
|
private FunctionalInterfaceSupport() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying
|
||||||
|
* functions externally.
|
||||||
|
*
|
||||||
|
* @param <T>
|
||||||
|
*/
|
||||||
|
@FunctionalInterface
|
||||||
|
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extension of consumer accepting functions throwing an exception.
|
||||||
|
*
|
||||||
|
* @param <T>
|
||||||
|
* @param <E>
|
||||||
|
*/
|
||||||
|
@FunctionalInterface
|
||||||
|
public interface ThrowingConsumer<T, E extends Exception> {
|
||||||
|
void accept(T t) throws E;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extension of supplier accepting functions throwing an exception.
|
||||||
|
*
|
||||||
|
* @param <T>
|
||||||
|
* @param <E>
|
||||||
|
*/
|
||||||
|
@FunctionalInterface
|
||||||
|
public interface ThrowingSupplier<T, E extends Exception> {
|
||||||
|
T get() throws E;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extension of runnable accepting functions throwing an exception.
|
||||||
|
*
|
||||||
|
* @param <E>
|
||||||
|
*/
|
||||||
|
@FunctionalInterface
|
||||||
|
public interface ThrowingRunnable<E extends Exception> {
|
||||||
|
void run() throws E;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.common.ThrowingSupport.rethrowAsRuntimeException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HDFS utility methods.
|
||||||
|
*/
|
||||||
|
public class HdfsSupport {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
|
||||||
|
|
||||||
|
private HdfsSupport() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes a path (file or dir) from HDFS.
|
||||||
|
*
|
||||||
|
* @param path Path to be removed
|
||||||
|
* @param configuration Configuration of hadoop env
|
||||||
|
*/
|
||||||
|
public static void remove(String path, Configuration configuration) {
|
||||||
|
logger.info("Removing path: {}", path);
|
||||||
|
rethrowAsRuntimeException(() -> {
|
||||||
|
Path f = new Path(path);
|
||||||
|
FileSystem fileSystem = FileSystem.get(configuration);
|
||||||
|
if (fileSystem.exists(f)) {
|
||||||
|
fileSystem.delete(f, true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lists hadoop files located below path or alternatively lists subdirs under path.
|
||||||
|
*
|
||||||
|
* @param path Path to be listed for hadoop files
|
||||||
|
* @param configuration Configuration of hadoop env
|
||||||
|
* @return List with string locations of hadoop files
|
||||||
|
*/
|
||||||
|
public static List<String> listFiles(String path, Configuration configuration) {
|
||||||
|
logger.info("Listing files in path: {}", path);
|
||||||
|
return rethrowAsRuntimeException(() -> Arrays
|
||||||
|
.stream(FileSystem.get(configuration).listStatus(new Path(path)))
|
||||||
|
.filter(FileStatus::isDirectory)
|
||||||
|
.map(x -> x.getPath().toString())
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inheritance utility methods.
|
||||||
|
*/
|
||||||
|
public class ModelSupport {
|
||||||
|
|
||||||
|
private ModelSupport() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks subclass-superclass relationship.
|
||||||
|
*
|
||||||
|
* @param subClazzObject Subclass object instance
|
||||||
|
* @param superClazzObject Superclass object instance
|
||||||
|
* @param <X> Subclass type
|
||||||
|
* @param <Y> Superclass type
|
||||||
|
* @return True if X is a subclass of Y
|
||||||
|
*/
|
||||||
|
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Y superClazzObject) {
|
||||||
|
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks subclass-superclass relationship.
|
||||||
|
*
|
||||||
|
* @param subClazzObject Subclass object instance
|
||||||
|
* @param superClazz Superclass class
|
||||||
|
* @param <X> Subclass type
|
||||||
|
* @param <Y> Superclass type
|
||||||
|
* @return True if X is a subclass of Y
|
||||||
|
*/
|
||||||
|
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Class<Y> superClazz) {
|
||||||
|
return isSubClass(subClazzObject.getClass(), superClazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks subclass-superclass relationship.
|
||||||
|
*
|
||||||
|
* @param subClazz Subclass class
|
||||||
|
* @param superClazz Superclass class
|
||||||
|
* @param <X> Subclass type
|
||||||
|
* @param <Y> Superclass type
|
||||||
|
* @return True if X is a subclass of Y
|
||||||
|
*/
|
||||||
|
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(Class<X> subClazz, Class<Y> superClazz) {
|
||||||
|
return superClazz.isAssignableFrom(subClazz);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SparkSession utility methods.
|
||||||
|
*/
|
||||||
|
public class SparkSessionSupport {
|
||||||
|
|
||||||
|
private SparkSessionSupport() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession
|
||||||
|
* when SparkSession is managed. Allows to reuse SparkSession created externally.
|
||||||
|
*
|
||||||
|
* @param conf SparkConf instance
|
||||||
|
* @param isSparkSessionManaged When true will stop SparkSession
|
||||||
|
* @param fn Consumer to be applied to constructed SparkSession
|
||||||
|
*/
|
||||||
|
public static void runWithSparkSession(SparkConf conf,
|
||||||
|
Boolean isSparkSessionManaged,
|
||||||
|
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||||
|
runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession
|
||||||
|
* when SparkSession is managed. Allows to reuse SparkSession created externally.
|
||||||
|
*
|
||||||
|
* @param sparkSessionBuilder Builder of SparkSession
|
||||||
|
* @param conf SparkConf instance
|
||||||
|
* @param isSparkSessionManaged When true will stop SparkSession
|
||||||
|
* @param fn Consumer to be applied to constructed SparkSession
|
||||||
|
*/
|
||||||
|
public static void runWithSparkSession(Function<SparkConf, SparkSession> sparkSessionBuilder,
|
||||||
|
SparkConf conf,
|
||||||
|
Boolean isSparkSessionManaged,
|
||||||
|
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||||
|
SparkSession spark = null;
|
||||||
|
try {
|
||||||
|
spark = sparkSessionBuilder.apply(conf);
|
||||||
|
fn.accept(spark);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} finally {
|
||||||
|
if (Objects.nonNull(spark) && isSparkSessionManaged) {
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,76 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingRunnable;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingSupplier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exception handling utility methods.
|
||||||
|
*/
|
||||||
|
public class ThrowingSupport {
|
||||||
|
|
||||||
|
private ThrowingSupport() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes given runnable and rethrows any exceptions as RuntimeException.
|
||||||
|
*
|
||||||
|
* @param fn Runnable to be executed
|
||||||
|
* @param <E> Type of exception thrown
|
||||||
|
*/
|
||||||
|
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn) {
|
||||||
|
try {
|
||||||
|
fn.run();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes given runnable and rethrows any exceptions as RuntimeException with custom message.
|
||||||
|
*
|
||||||
|
* @param fn Runnable to be executed
|
||||||
|
* @param msg Message to be set for rethrown exception
|
||||||
|
* @param <E> Type of exception thrown
|
||||||
|
*/
|
||||||
|
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn, String msg) {
|
||||||
|
try {
|
||||||
|
fn.run();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes given supplier and rethrows any exceptions as RuntimeException.
|
||||||
|
*
|
||||||
|
* @param fn Supplier to be executed
|
||||||
|
* @param <T> Type of returned value
|
||||||
|
* @param <E> Type of exception thrown
|
||||||
|
* @return Result of supplier execution
|
||||||
|
*/
|
||||||
|
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn) {
|
||||||
|
try {
|
||||||
|
return fn.get();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes given supplier and rethrows any exceptions as RuntimeException with custom message.
|
||||||
|
*
|
||||||
|
* @param fn Supplier to be executed
|
||||||
|
* @param msg Message to be set for rethrown exception
|
||||||
|
* @param <T> Type of returned value
|
||||||
|
* @param <E> Type of exception thrown
|
||||||
|
* @return Result of supplier execution
|
||||||
|
*/
|
||||||
|
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn, String msg) {
|
||||||
|
try {
|
||||||
|
return fn.get();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.junit.jupiter.api.Nested;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
public class HdfsSupportTest {
|
||||||
|
|
||||||
|
@Nested
|
||||||
|
class Remove {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldThrowARuntimeExceptionOnError() {
|
||||||
|
// when
|
||||||
|
assertThrows(RuntimeException.class, () ->
|
||||||
|
HdfsSupport.remove(null, new Configuration()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
||||||
|
// when
|
||||||
|
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
||||||
|
|
||||||
|
// then
|
||||||
|
assertFalse(Files.exists(tempDir));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
||||||
|
// given
|
||||||
|
Path file = Files.createTempFile(tempDir, "p", "s");
|
||||||
|
|
||||||
|
// when
|
||||||
|
HdfsSupport.remove(file.toString(), new Configuration());
|
||||||
|
|
||||||
|
// then
|
||||||
|
assertFalse(Files.exists(file));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nested
|
||||||
|
class ListFiles {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldThrowARuntimeExceptionOnError() {
|
||||||
|
// when
|
||||||
|
assertThrows(RuntimeException.class, () ->
|
||||||
|
HdfsSupport.listFiles(null, new Configuration()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
||||||
|
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
|
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
|
|
||||||
|
// when
|
||||||
|
List<String> paths = HdfsSupport.listFiles(tempDir.toString(), new Configuration());
|
||||||
|
|
||||||
|
// then
|
||||||
|
assertEquals(2, paths.size());
|
||||||
|
List<String> expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()})
|
||||||
|
.sorted().collect(Collectors.toList());
|
||||||
|
List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
|
||||||
|
assertTrue(actuals.get(0).contains(expecteds.get(0)));
|
||||||
|
assertTrue(actuals.get(1).contains(expecteds.get(1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import org.junit.jupiter.api.Nested;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
public class ModelSupportTest {
|
||||||
|
|
||||||
|
@Nested
|
||||||
|
class IsSubClass {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldReturnFalseWhenSubClassDoesNotExtendSuperClass() {
|
||||||
|
// when
|
||||||
|
Boolean result = ModelSupport.isSubClass(Relation.class, OafEntity.class);
|
||||||
|
|
||||||
|
// then
|
||||||
|
assertFalse(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldReturnTrueWhenSubClassExtendsSuperClass() {
|
||||||
|
// when
|
||||||
|
Boolean result = ModelSupport.isSubClass(Result.class, OafEntity.class);
|
||||||
|
|
||||||
|
// then
|
||||||
|
assertTrue(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.common;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.Nested;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
|
public class SparkSessionSupportTest {
|
||||||
|
|
||||||
|
@Nested
|
||||||
|
class RunWithSparkSession {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception {
|
||||||
|
// given
|
||||||
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
SparkConf conf = mock(SparkConf.class);
|
||||||
|
Function<SparkConf, SparkSession> sparkSessionBuilder = mock(Function.class);
|
||||||
|
when(sparkSessionBuilder.apply(conf)).thenReturn(spark);
|
||||||
|
ThrowingConsumer<SparkSession, Exception> fn = mock(ThrowingConsumer.class);
|
||||||
|
|
||||||
|
// when
|
||||||
|
SparkSessionSupport.runWithSparkSession(sparkSessionBuilder, conf, false, fn);
|
||||||
|
|
||||||
|
// then
|
||||||
|
verify(sparkSessionBuilder).apply(conf);
|
||||||
|
verify(fn).accept(spark);
|
||||||
|
verify(spark, never()).stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception {
|
||||||
|
// given
|
||||||
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
SparkConf conf = mock(SparkConf.class);
|
||||||
|
Function<SparkConf, SparkSession> sparkSessionBuilder = mock(Function.class);
|
||||||
|
when(sparkSessionBuilder.apply(conf)).thenReturn(spark);
|
||||||
|
ThrowingConsumer<SparkSession, Exception> fn = mock(ThrowingConsumer.class);
|
||||||
|
|
||||||
|
// when
|
||||||
|
SparkSessionSupport.runWithSparkSession(sparkSessionBuilder, conf, true, fn);
|
||||||
|
|
||||||
|
// then
|
||||||
|
verify(sparkSessionBuilder).apply(conf);
|
||||||
|
verify(fn).accept(spark);
|
||||||
|
verify(spark, times(1)).stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue