[dhp-actionmanager] common package added with utility classes supporting hadoop and spark envs
This commit is contained in:
parent
ad70c23b2e
commit
f9f7350bb9
|
@ -0,0 +1,55 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* Provides serializable and throwing extensions to standard functional interfaces.
|
||||
*/
|
||||
public class FunctionalInterfaceSupport {
|
||||
|
||||
private FunctionalInterfaceSupport() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying
|
||||
* functions externally.
|
||||
*
|
||||
* @param <T>
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension of consumer accepting functions throwing an exception.
|
||||
*
|
||||
* @param <T>
|
||||
* @param <E>
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ThrowingConsumer<T, E extends Exception> {
|
||||
void accept(T t) throws E;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension of supplier accepting functions throwing an exception.
|
||||
*
|
||||
* @param <T>
|
||||
* @param <E>
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ThrowingSupplier<T, E extends Exception> {
|
||||
T get() throws E;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension of runnable accepting functions throwing an exception.
|
||||
*
|
||||
* @param <E>
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ThrowingRunnable<E extends Exception> {
|
||||
void run() throws E;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.common.ThrowingSupport.rethrowAsRuntimeException;
|
||||
|
||||
/**
|
||||
* HDFS utility methods.
|
||||
*/
|
||||
public class HdfsSupport {
|
||||
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
|
||||
|
||||
private HdfsSupport() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes a path (file or dir) from HDFS.
|
||||
*
|
||||
* @param path Path to be removed
|
||||
* @param configuration Configuration of hadoop env
|
||||
*/
|
||||
public static void remove(String path, Configuration configuration) {
|
||||
logger.info("Removing path: {}", path);
|
||||
rethrowAsRuntimeException(() -> {
|
||||
Path f = new Path(path);
|
||||
FileSystem fileSystem = FileSystem.get(configuration);
|
||||
if (fileSystem.exists(f)) {
|
||||
fileSystem.delete(f, true);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists hadoop files located below path or alternatively lists subdirs under path.
|
||||
*
|
||||
* @param path Path to be listed for hadoop files
|
||||
* @param configuration Configuration of hadoop env
|
||||
* @return List with string locations of hadoop files
|
||||
*/
|
||||
public static List<String> listFiles(String path, Configuration configuration) {
|
||||
logger.info("Listing files in path: {}", path);
|
||||
return rethrowAsRuntimeException(() -> Arrays
|
||||
.stream(FileSystem.get(configuration).listStatus(new Path(path)))
|
||||
.filter(FileStatus::isDirectory)
|
||||
.map(x -> x.getPath().toString())
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
|
||||
/**
|
||||
* Inheritance utility methods.
|
||||
*/
|
||||
public class ModelSupport {
|
||||
|
||||
private ModelSupport() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks subclass-superclass relationship.
|
||||
*
|
||||
* @param subClazzObject Subclass object instance
|
||||
* @param superClazzObject Superclass object instance
|
||||
* @param <X> Subclass type
|
||||
* @param <Y> Superclass type
|
||||
* @return True if X is a subclass of Y
|
||||
*/
|
||||
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Y superClazzObject) {
|
||||
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks subclass-superclass relationship.
|
||||
*
|
||||
* @param subClazzObject Subclass object instance
|
||||
* @param superClazz Superclass class
|
||||
* @param <X> Subclass type
|
||||
* @param <Y> Superclass type
|
||||
* @return True if X is a subclass of Y
|
||||
*/
|
||||
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Class<Y> superClazz) {
|
||||
return isSubClass(subClazzObject.getClass(), superClazz);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks subclass-superclass relationship.
|
||||
*
|
||||
* @param subClazz Subclass class
|
||||
* @param superClazz Superclass class
|
||||
* @param <X> Subclass type
|
||||
* @param <Y> Superclass type
|
||||
* @return True if X is a subclass of Y
|
||||
*/
|
||||
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(Class<X> subClazz, Class<Y> superClazz) {
|
||||
return superClazz.isAssignableFrom(subClazz);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* SparkSession utility methods.
|
||||
*/
|
||||
public class SparkSessionSupport {
|
||||
|
||||
private SparkSessionSupport() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession
|
||||
* when SparkSession is managed. Allows to reuse SparkSession created externally.
|
||||
*
|
||||
* @param conf SparkConf instance
|
||||
* @param isSparkSessionManaged When true will stop SparkSession
|
||||
* @param fn Consumer to be applied to constructed SparkSession
|
||||
*/
|
||||
public static void runWithSparkSession(SparkConf conf,
|
||||
Boolean isSparkSessionManaged,
|
||||
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||
runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession
|
||||
* when SparkSession is managed. Allows to reuse SparkSession created externally.
|
||||
*
|
||||
* @param sparkSessionBuilder Builder of SparkSession
|
||||
* @param conf SparkConf instance
|
||||
* @param isSparkSessionManaged When true will stop SparkSession
|
||||
* @param fn Consumer to be applied to constructed SparkSession
|
||||
*/
|
||||
public static void runWithSparkSession(Function<SparkConf, SparkSession> sparkSessionBuilder,
|
||||
SparkConf conf,
|
||||
Boolean isSparkSessionManaged,
|
||||
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||
SparkSession spark = null;
|
||||
try {
|
||||
spark = sparkSessionBuilder.apply(conf);
|
||||
fn.accept(spark);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
if (Objects.nonNull(spark) && isSparkSessionManaged) {
|
||||
spark.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingRunnable;
|
||||
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingSupplier;
|
||||
|
||||
/**
|
||||
* Exception handling utility methods.
|
||||
*/
|
||||
public class ThrowingSupport {
|
||||
|
||||
private ThrowingSupport() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes given runnable and rethrows any exceptions as RuntimeException.
|
||||
*
|
||||
* @param fn Runnable to be executed
|
||||
* @param <E> Type of exception thrown
|
||||
*/
|
||||
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn) {
|
||||
try {
|
||||
fn.run();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes given runnable and rethrows any exceptions as RuntimeException with custom message.
|
||||
*
|
||||
* @param fn Runnable to be executed
|
||||
* @param msg Message to be set for rethrown exception
|
||||
* @param <E> Type of exception thrown
|
||||
*/
|
||||
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn, String msg) {
|
||||
try {
|
||||
fn.run();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(msg, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes given supplier and rethrows any exceptions as RuntimeException.
|
||||
*
|
||||
* @param fn Supplier to be executed
|
||||
* @param <T> Type of returned value
|
||||
* @param <E> Type of exception thrown
|
||||
* @return Result of supplier execution
|
||||
*/
|
||||
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn) {
|
||||
try {
|
||||
return fn.get();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes given supplier and rethrows any exceptions as RuntimeException with custom message.
|
||||
*
|
||||
* @param fn Supplier to be executed
|
||||
* @param msg Message to be set for rethrown exception
|
||||
* @param <T> Type of returned value
|
||||
* @param <E> Type of exception thrown
|
||||
* @return Result of supplier execution
|
||||
*/
|
||||
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn, String msg) {
|
||||
try {
|
||||
return fn.get();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(msg, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.junit.jupiter.api.Nested;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
public class HdfsSupportTest {
|
||||
|
||||
@Nested
|
||||
class Remove {
|
||||
|
||||
@Test
|
||||
public void shouldThrowARuntimeExceptionOnError() {
|
||||
// when
|
||||
assertThrows(RuntimeException.class, () ->
|
||||
HdfsSupport.remove(null, new Configuration()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
||||
// when
|
||||
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
||||
|
||||
// then
|
||||
assertFalse(Files.exists(tempDir));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
||||
// given
|
||||
Path file = Files.createTempFile(tempDir, "p", "s");
|
||||
|
||||
// when
|
||||
HdfsSupport.remove(file.toString(), new Configuration());
|
||||
|
||||
// then
|
||||
assertFalse(Files.exists(file));
|
||||
}
|
||||
}
|
||||
|
||||
@Nested
|
||||
class ListFiles {
|
||||
|
||||
@Test
|
||||
public void shouldThrowARuntimeExceptionOnError() {
|
||||
// when
|
||||
assertThrows(RuntimeException.class, () ->
|
||||
HdfsSupport.listFiles(null, new Configuration()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
||||
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
||||
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
||||
|
||||
// when
|
||||
List<String> paths = HdfsSupport.listFiles(tempDir.toString(), new Configuration());
|
||||
|
||||
// then
|
||||
assertEquals(2, paths.size());
|
||||
List<String> expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()})
|
||||
.sorted().collect(Collectors.toList());
|
||||
List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
|
||||
assertTrue(actuals.get(0).contains(expecteds.get(0)));
|
||||
assertTrue(actuals.get(1).contains(expecteds.get(1)));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import org.junit.jupiter.api.Nested;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class ModelSupportTest {
|
||||
|
||||
@Nested
|
||||
class IsSubClass {
|
||||
|
||||
@Test
|
||||
public void shouldReturnFalseWhenSubClassDoesNotExtendSuperClass() {
|
||||
// when
|
||||
Boolean result = ModelSupport.isSubClass(Relation.class, OafEntity.class);
|
||||
|
||||
// then
|
||||
assertFalse(result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldReturnTrueWhenSubClassExtendsSuperClass() {
|
||||
// when
|
||||
Boolean result = ModelSupport.isSubClass(Result.class, OafEntity.class);
|
||||
|
||||
// then
|
||||
assertTrue(result);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package eu.dnetlib.dhp.actionmanager.common;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.Nested;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.function.Function;
|
||||
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
public class SparkSessionSupportTest {
|
||||
|
||||
@Nested
|
||||
class RunWithSparkSession {
|
||||
|
||||
@Test
|
||||
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception {
|
||||
// given
|
||||
SparkSession spark = mock(SparkSession.class);
|
||||
SparkConf conf = mock(SparkConf.class);
|
||||
Function<SparkConf, SparkSession> sparkSessionBuilder = mock(Function.class);
|
||||
when(sparkSessionBuilder.apply(conf)).thenReturn(spark);
|
||||
ThrowingConsumer<SparkSession, Exception> fn = mock(ThrowingConsumer.class);
|
||||
|
||||
// when
|
||||
SparkSessionSupport.runWithSparkSession(sparkSessionBuilder, conf, false, fn);
|
||||
|
||||
// then
|
||||
verify(sparkSessionBuilder).apply(conf);
|
||||
verify(fn).accept(spark);
|
||||
verify(spark, never()).stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception {
|
||||
// given
|
||||
SparkSession spark = mock(SparkSession.class);
|
||||
SparkConf conf = mock(SparkConf.class);
|
||||
Function<SparkConf, SparkSession> sparkSessionBuilder = mock(Function.class);
|
||||
when(sparkSessionBuilder.apply(conf)).thenReturn(spark);
|
||||
ThrowingConsumer<SparkSession, Exception> fn = mock(ThrowingConsumer.class);
|
||||
|
||||
// when
|
||||
SparkSessionSupport.runWithSparkSession(sparkSessionBuilder, conf, true, fn);
|
||||
|
||||
// then
|
||||
verify(sparkSessionBuilder).apply(conf);
|
||||
verify(fn).accept(spark);
|
||||
verify(spark, times(1)).stop();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue