From d74e128aa6c9cec44f81b6e97ecb83b4b15bf0bd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 7 Apr 2020 11:56:22 +0200 Subject: [PATCH] Utility classes moved in dhp-common and dhp-schemas --- dhp-common/pom.xml | 6 - .../dnetlib/dhp/common/ModelSupportTest.java | 36 ------ .../dnetlib/dhp/schema/common/EntityType.java | 23 ++++ .../dhp/schema/common/MainEntityType.java | 9 ++ .../dhp/schema/common/ModelSupport.java | 104 +++++++++++++++++- .../dhp/schema}/common/ModelSupportTest.java | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 1 + .../common/FunctionalInterfaceSupport.java | 55 --------- .../dhp/actionmanager/common/HdfsSupport.java | 57 ---------- .../actionmanager/common/ModelSupport.java | 51 --------- .../common/SparkSessionSupport.java | 57 ---------- .../actionmanager/common/ThrowingSupport.java | 76 ------------- .../PartitionActionSetsByPayloadTypeJob.java | 4 +- .../actionmanager/promote/MergeAndGet.java | 4 +- .../PromoteActionPayloadForGraphTableJob.java | 38 +------ .../PromoteActionPayloadFunctions.java | 4 +- .../actionmanager/common/HdfsSupportTest.java | 78 ------------- .../common/SparkSessionSupportTest.java | 54 --------- ...rtitionActionSetsByPayloadTypeJobTest.java | 2 +- .../promote/MergeAndGetTest.java | 2 +- .../PromoteActionPayloadFunctionsTest.java | 2 +- .../dhp/oa/graph/GraphMappingUtils.java | 31 ------ .../dhp/oa/graph/SparkGraphImporterJob.java | 3 +- .../oa/provision/AdjacencyListBuilderJob.java | 5 +- .../CreateRelatedEntitiesJob_phase1.java | 4 +- .../CreateRelatedEntitiesJob_phase2.java | 4 +- .../dhp/oa/provision/PrepareRelationsJob.java | 1 + .../oa/provision/utils/GraphMappingUtils.java | 103 +---------------- .../oa/provision/utils/XmlRecordFactory.java | 16 ++- 29 files changed, 177 insertions(+), 655 deletions(-) delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/ModelSupportTest.java create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java rename {dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager => dhp-schemas/src/test/java/eu/dnetlib/dhp/schema}/common/ModelSupportTest.java (95%) delete mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/FunctionalInterfaceSupport.java delete mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupport.java delete mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ModelSupport.java delete mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupport.java delete mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ThrowingSupport.java delete mode 100644 dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupportTest.java delete mode 100644 dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupportTest.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/GraphMappingUtils.java diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index d224ebc9f..7b073397f 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -14,12 +14,6 @@ - - eu.dnetlib.dhp - dhp-schemas - ${project.version} - - org.apache.hadoop hadoop-common diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/ModelSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/ModelSupportTest.java deleted file mode 100644 index bfed019e9..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/ModelSupportTest.java +++ /dev/null @@ -1,36 +0,0 @@ -package eu.dnetlib.dhp.common; - -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class ModelSupportTest { - - @Nested - class IsSubClass { - - @Test - public void shouldReturnFalseWhenSubClassDoesNotExtendSuperClass() { - // when - Boolean result = ModelSupport.isSubClass(Relation.class, OafEntity.class); - - // then - assertFalse(result); - } - - @Test - public void shouldReturnTrueWhenSubClassExtendsSuperClass() { - // when - Boolean result = ModelSupport.isSubClass(Result.class, OafEntity.class); - - // then - assertTrue(result); - } - } -} \ No newline at end of file diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java new file mode 100644 index 000000000..9e4fa7d8a --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java @@ -0,0 +1,23 @@ +package eu.dnetlib.dhp.schema.common; + +import eu.dnetlib.dhp.schema.oaf.OafEntity; + +/** + * Actual entity types in the Graph + */ +public enum EntityType { + + publication, dataset, otherresearchproduct, software, datasource, organization, project; + + /** + * Resolves the EntityType, given the relative class name + * @param clazz the given class name + * @param actual OafEntity subclass + * @return the EntityType associated to the given class + */ + public static EntityType fromClass(Class clazz) { + + return EntityType.valueOf(clazz.getSimpleName().toLowerCase()); + } + +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java new file mode 100644 index 000000000..7d65e39c0 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java @@ -0,0 +1,9 @@ +package eu.dnetlib.dhp.schema.common; + +/** + * Main entity types in the Graph + */ +public enum MainEntityType { + + result, datasource, organization, project +} \ No newline at end of file diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 3c774aa38..48f711a03 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -1,12 +1,60 @@ package eu.dnetlib.dhp.schema.common; -import eu.dnetlib.dhp.schema.oaf.Oaf; +import com.google.common.collect.Maps; +import eu.dnetlib.dhp.schema.oaf.*; + +import java.util.Map; /** - * Inheritance utility methods. + * Oaf model utility methods. */ public class ModelSupport { + /** + * Defines the mapping between the actual entity type and the main entity type + */ + private static Map entityMapping = Maps.newHashMap(); + + static { + entityMapping.put(EntityType.publication, MainEntityType.result); + entityMapping.put(EntityType.dataset, MainEntityType.result); + entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); + entityMapping.put(EntityType.software, MainEntityType.result); + entityMapping.put(EntityType.datasource, MainEntityType.datasource); + entityMapping.put(EntityType.organization, MainEntityType.organization); + entityMapping.put(EntityType.project, MainEntityType.project); + } + + /** + * Defines the mapping between the actual entity types and the relative classes implementing them + */ + public final static Map entityTypes = Maps.newHashMap(); + + static { + entityTypes.put(EntityType.datasource, Datasource.class); + entityTypes.put(EntityType.organization, Organization.class); + entityTypes.put(EntityType.project, Project.class); + entityTypes.put(EntityType.dataset, Dataset.class); + entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class); + entityTypes.put(EntityType.software, Software.class); + entityTypes.put(EntityType.publication, Publication.class); + } + + public final static Map oafTypes = Maps.newHashMap(); + + static { + oafTypes.put("datasource", Datasource.class); + oafTypes.put("organization", Organization.class); + oafTypes.put("project", Project.class); + oafTypes.put("dataset", Dataset.class); + oafTypes.put("otherresearchproduct", OtherResearchProduct.class); + oafTypes.put("software", Software.class); + oafTypes.put("publication", Publication.class); + oafTypes.put("relation", Relation.class); + } + + private static final String schemeTemplate = "dnet:%s_%s_relations"; + private ModelSupport() { } @@ -48,4 +96,56 @@ public class ModelSupport { public static Boolean isSubClass(Class subClazz, Class superClazz) { return superClazz.isAssignableFrom(subClazz); } + + /** + * Lists all the OAF model classes + * + * @param + * @return + */ + public static Class[] getOafModelClasses() { + return new Class[]{ + Author.class, + Context.class, + Country.class, + DataInfo.class, + Dataset.class, + Datasource.class, + ExternalReference.class, + ExtraInfo.class, + Field.class, + GeoLocation.class, + Instance.class, + Journal.class, + KeyValue.class, + Oaf.class, + OafEntity.class, + OAIProvenance.class, + Organization.class, + OriginDescription.class, + OtherResearchProduct.class, + Project.class, + Publication.class, + Qualifier.class, + Relation.class, + Result.class, + Software.class, + StructuredProperty.class + }; + } + + public static String getMainType(final EntityType type) { + return entityMapping.get(type).name(); + } + + public static boolean isResult(EntityType type) { + return MainEntityType.result.name().equals(getMainType(type)); + } + + public static String getScheme(final String sourceType, final String targetType) { + return String.format(schemeTemplate, + entityMapping.get(EntityType.valueOf(sourceType)).name(), + entityMapping.get(EntityType.valueOf(targetType)).name()); + } + } \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/ModelSupportTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java similarity index 95% rename from dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/ModelSupportTest.java rename to dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java index ea8082fe1..21583cd44 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/ModelSupportTest.java +++ b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.actionmanager.common; +package eu.dnetlib.dhp.schema.common; import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 6a8820f49..b034528ba 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -52,5 +52,6 @@ dhp-schemas ${project.version} + diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/FunctionalInterfaceSupport.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/FunctionalInterfaceSupport.java deleted file mode 100644 index 0f962428f..000000000 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/FunctionalInterfaceSupport.java +++ /dev/null @@ -1,55 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import java.io.Serializable; -import java.util.function.Supplier; - -/** - * Provides serializable and throwing extensions to standard functional interfaces. - */ -public class FunctionalInterfaceSupport { - - private FunctionalInterfaceSupport() { - } - - /** - * Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying - * functions externally. - * - * @param - */ - @FunctionalInterface - public interface SerializableSupplier extends Supplier, Serializable { - } - - /** - * Extension of consumer accepting functions throwing an exception. - * - * @param - * @param - */ - @FunctionalInterface - public interface ThrowingConsumer { - void accept(T t) throws E; - } - - /** - * Extension of supplier accepting functions throwing an exception. - * - * @param - * @param - */ - @FunctionalInterface - public interface ThrowingSupplier { - T get() throws E; - } - - /** - * Extension of runnable accepting functions throwing an exception. - * - * @param - */ - @FunctionalInterface - public interface ThrowingRunnable { - void run() throws E; - } -} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupport.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupport.java deleted file mode 100644 index 70af7e2a5..000000000 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupport.java +++ /dev/null @@ -1,57 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.actionmanager.common.ThrowingSupport.rethrowAsRuntimeException; - -/** - * HDFS utility methods. - */ -public class HdfsSupport { - private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class); - - private HdfsSupport() { - } - - /** - * Removes a path (file or dir) from HDFS. - * - * @param path Path to be removed - * @param configuration Configuration of hadoop env - */ - public static void remove(String path, Configuration configuration) { - logger.info("Removing path: {}", path); - rethrowAsRuntimeException(() -> { - Path f = new Path(path); - FileSystem fileSystem = FileSystem.get(configuration); - if (fileSystem.exists(f)) { - fileSystem.delete(f, true); - } - }); - } - - /** - * Lists hadoop files located below path or alternatively lists subdirs under path. - * - * @param path Path to be listed for hadoop files - * @param configuration Configuration of hadoop env - * @return List with string locations of hadoop files - */ - public static List listFiles(String path, Configuration configuration) { - logger.info("Listing files in path: {}", path); - return rethrowAsRuntimeException(() -> Arrays - .stream(FileSystem.get(configuration).listStatus(new Path(path))) - .filter(FileStatus::isDirectory) - .map(x -> x.getPath().toString()) - .collect(Collectors.toList())); - } -} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ModelSupport.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ModelSupport.java deleted file mode 100644 index 62cb8a542..000000000 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ModelSupport.java +++ /dev/null @@ -1,51 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import eu.dnetlib.dhp.schema.oaf.Oaf; - -/** - * Inheritance utility methods. - */ -public class ModelSupport { - - private ModelSupport() { - } - - /** - * Checks subclass-superclass relationship. - * - * @param subClazzObject Subclass object instance - * @param superClazzObject Superclass object instance - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass(X subClazzObject, Y superClazzObject) { - return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); - } - - /** - * Checks subclass-superclass relationship. - * - * @param subClazzObject Subclass object instance - * @param superClazz Superclass class - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass(X subClazzObject, Class superClazz) { - return isSubClass(subClazzObject.getClass(), superClazz); - } - - /** - * Checks subclass-superclass relationship. - * - * @param subClazz Subclass class - * @param superClazz Superclass class - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass(Class subClazz, Class superClazz) { - return superClazz.isAssignableFrom(subClazz); - } -} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupport.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupport.java deleted file mode 100644 index 96b29fd22..000000000 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupport.java +++ /dev/null @@ -1,57 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingConsumer; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; - -import java.util.Objects; -import java.util.function.Function; - -/** - * SparkSession utility methods. - */ -public class SparkSessionSupport { - - private SparkSessionSupport() { - } - - /** - * Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession - * when SparkSession is managed. Allows to reuse SparkSession created externally. - * - * @param conf SparkConf instance - * @param isSparkSessionManaged When true will stop SparkSession - * @param fn Consumer to be applied to constructed SparkSession - */ - public static void runWithSparkSession(SparkConf conf, - Boolean isSparkSessionManaged, - ThrowingConsumer fn) { - runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn); - } - - /** - * Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession - * when SparkSession is managed. Allows to reuse SparkSession created externally. - * - * @param sparkSessionBuilder Builder of SparkSession - * @param conf SparkConf instance - * @param isSparkSessionManaged When true will stop SparkSession - * @param fn Consumer to be applied to constructed SparkSession - */ - public static void runWithSparkSession(Function sparkSessionBuilder, - SparkConf conf, - Boolean isSparkSessionManaged, - ThrowingConsumer fn) { - SparkSession spark = null; - try { - spark = sparkSessionBuilder.apply(conf); - fn.accept(spark); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - if (Objects.nonNull(spark) && isSparkSessionManaged) { - spark.stop(); - } - } - } -} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ThrowingSupport.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ThrowingSupport.java deleted file mode 100644 index a384728d0..000000000 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/common/ThrowingSupport.java +++ /dev/null @@ -1,76 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingRunnable; -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingSupplier; - -/** - * Exception handling utility methods. - */ -public class ThrowingSupport { - - private ThrowingSupport() { - } - - /** - * Executes given runnable and rethrows any exceptions as RuntimeException. - * - * @param fn Runnable to be executed - * @param Type of exception thrown - */ - public static void rethrowAsRuntimeException(ThrowingRunnable fn) { - try { - fn.run(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Executes given runnable and rethrows any exceptions as RuntimeException with custom message. - * - * @param fn Runnable to be executed - * @param msg Message to be set for rethrown exception - * @param Type of exception thrown - */ - public static void rethrowAsRuntimeException(ThrowingRunnable fn, String msg) { - try { - fn.run(); - } catch (Exception e) { - throw new RuntimeException(msg, e); - } - } - - /** - * Executes given supplier and rethrows any exceptions as RuntimeException. - * - * @param fn Supplier to be executed - * @param Type of returned value - * @param Type of exception thrown - * @return Result of supplier execution - */ - public static T rethrowAsRuntimeException(ThrowingSupplier fn) { - try { - return fn.get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Executes given supplier and rethrows any exceptions as RuntimeException with custom message. - * - * @param fn Supplier to be executed - * @param msg Message to be set for rethrown exception - * @param Type of returned value - * @param Type of exception thrown - * @return Result of supplier execution - */ - public static T rethrowAsRuntimeException(ThrowingSupplier fn, String msg) { - try { - return fn.get(); - } catch (Exception e) { - throw new RuntimeException(msg, e); - } - } - -} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJob.java index dbae0560b..003f6dc6c 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJob.java @@ -1,6 +1,6 @@ package eu.dnetlib.dhp.actionmanager.partition; -import eu.dnetlib.dhp.actionmanager.common.HdfsSupport; +import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import org.apache.commons.io.IOUtils; @@ -18,7 +18,7 @@ import java.util.Collections; import java.util.List; import java.util.Optional; -import static eu.dnetlib.dhp.actionmanager.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static org.apache.spark.sql.functions.*; /** diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index 55346f612..19b2104bc 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.actionmanager.promote; -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.SerializableSupplier; +import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Relation; import java.util.function.BiFunction; -import static eu.dnetlib.dhp.actionmanager.common.ModelSupport.isSubClass; +import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass; /** * OAF model merging support. diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index fc7f6d04e..abad4f210 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.actionmanager.promote; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.SerializableSupplier; -import eu.dnetlib.dhp.actionmanager.common.HdfsSupport; +import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; +import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -20,8 +21,8 @@ import java.util.Optional; import java.util.function.BiFunction; import java.util.function.Function; -import static eu.dnetlib.dhp.actionmanager.common.ModelSupport.isSubClass; -import static eu.dnetlib.dhp.actionmanager.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; /** * Applies a given action payload file to graph table of compatible type. @@ -69,34 +70,7 @@ public class PromoteActionPayloadForGraphTableJob { SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(new Class[]{ - Author.class, - Context.class, - Country.class, - DataInfo.class, - eu.dnetlib.dhp.schema.oaf.Dataset.class, - Datasource.class, - ExternalReference.class, - ExtraInfo.class, - Field.class, - GeoLocation.class, - Instance.class, - Journal.class, - KeyValue.class, - Oaf.class, - OafEntity.class, - OAIProvenance.class, - Organization.class, - OriginDescription.class, - OtherResearchProduct.class, - Project.class, - Publication.class, - Qualifier.class, - Relation.class, - Result.class, - Software.class, - StructuredProperty.class - }); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); runWithSparkSession(conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java index 5773b6654..fda86cb19 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java @@ -1,6 +1,6 @@ package eu.dnetlib.dhp.actionmanager.promote; -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.SerializableSupplier; +import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.schema.oaf.Oaf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; @@ -16,7 +16,7 @@ import java.util.Optional; import java.util.function.BiFunction; import java.util.function.Function; -import static eu.dnetlib.dhp.actionmanager.common.ModelSupport.isSubClass; +import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass; /** * Promote action payload functions. diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupportTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupportTest.java deleted file mode 100644 index e352a9445..000000000 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/HdfsSupportTest.java +++ /dev/null @@ -1,78 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import org.apache.hadoop.conf.Configuration; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import static org.junit.jupiter.api.Assertions.*; - -public class HdfsSupportTest { - - @Nested - class Remove { - - @Test - public void shouldThrowARuntimeExceptionOnError() { - // when - assertThrows(RuntimeException.class, () -> - HdfsSupport.remove(null, new Configuration())); - } - - @Test - public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { - // when - HdfsSupport.remove(tempDir.toString(), new Configuration()); - - // then - assertFalse(Files.exists(tempDir)); - } - - @Test - public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { - // given - Path file = Files.createTempFile(tempDir, "p", "s"); - - // when - HdfsSupport.remove(file.toString(), new Configuration()); - - // then - assertFalse(Files.exists(file)); - } - } - - @Nested - class ListFiles { - - @Test - public void shouldThrowARuntimeExceptionOnError() { - // when - assertThrows(RuntimeException.class, () -> - HdfsSupport.listFiles(null, new Configuration())); - } - - @Test - public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { - Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); - Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); - - // when - List paths = HdfsSupport.listFiles(tempDir.toString(), new Configuration()); - - // then - assertEquals(2, paths.size()); - List expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()}) - .sorted().collect(Collectors.toList()); - List actuals = paths.stream().sorted().collect(Collectors.toList()); - assertTrue(actuals.get(0).contains(expecteds.get(0))); - assertTrue(actuals.get(1).contains(expecteds.get(1))); - } - } -} diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupportTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupportTest.java deleted file mode 100644 index 93debe617..000000000 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/common/SparkSessionSupportTest.java +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.common; - -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.ThrowingConsumer; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.function.Function; - -import static org.mockito.Mockito.*; - -public class SparkSessionSupportTest { - - @Nested - class RunWithSparkSession { - - @Test - public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { - // given - SparkSession spark = mock(SparkSession.class); - SparkConf conf = mock(SparkConf.class); - Function sparkSessionBuilder = mock(Function.class); - when(sparkSessionBuilder.apply(conf)).thenReturn(spark); - ThrowingConsumer fn = mock(ThrowingConsumer.class); - - // when - SparkSessionSupport.runWithSparkSession(sparkSessionBuilder, conf, false, fn); - - // then - verify(sparkSessionBuilder).apply(conf); - verify(fn).accept(spark); - verify(spark, never()).stop(); - } - - @Test - public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { - // given - SparkSession spark = mock(SparkSession.class); - SparkConf conf = mock(SparkConf.class); - Function sparkSessionBuilder = mock(Function.class); - when(sparkSessionBuilder.apply(conf)).thenReturn(spark); - ThrowingConsumer fn = mock(ThrowingConsumer.class); - - // when - SparkSessionSupport.runWithSparkSession(sparkSessionBuilder, conf, true, fn); - - // then - verify(sparkSessionBuilder).apply(conf); - verify(fn).accept(spark); - verify(spark, times(1)).stop(); - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index 644bf19e5..2761a7c89 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -26,7 +26,7 @@ import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; -import static eu.dnetlib.dhp.actionmanager.common.ThrowingSupport.rethrowAsRuntimeException; +import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException; import static org.apache.spark.sql.functions.*; import static org.junit.jupiter.api.Assertions.assertIterableEquals; import static scala.collection.JavaConversions.mutableSeqAsJavaList; diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index caacada49..154e0a331 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -1,6 +1,6 @@ package eu.dnetlib.dhp.actionmanager.promote; -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.SerializableSupplier; +import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.schema.oaf.*; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index 39574e274..e3fc7db48 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -1,6 +1,6 @@ package eu.dnetlib.dhp.actionmanager.promote; -import eu.dnetlib.dhp.actionmanager.common.FunctionalInterfaceSupport.SerializableSupplier; +import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.schema.oaf.Oaf; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/GraphMappingUtils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/GraphMappingUtils.java deleted file mode 100644 index 81fde7e29..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/GraphMappingUtils.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.dnetlib.dhp.oa.graph; - -import java.util.Map; - -import com.google.common.collect.Maps; - -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; - -public class GraphMappingUtils { - - public final static Map types = Maps.newHashMap(); - - static { - types.put("datasource", Datasource.class); - types.put("organization", Organization.class); - types.put("project", Project.class); - types.put("dataset", Dataset.class); - types.put("otherresearchproduct", OtherResearchProduct.class); - types.put("software", Software.class); - types.put("publication", Publication.class); - types.put("relation", Relation.class); - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/SparkGraphImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/SparkGraphImporterJob.java index e78635a81..44b534028 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/SparkGraphImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/SparkGraphImporterJob.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -39,7 +40,7 @@ public class SparkGraphImporterJob { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); // Read the input file and convert it into RDD of serializable object - GraphMappingUtils.types.forEach((name, clazz) -> spark.createDataset(sc.textFile(inputPath + "/" + name) + ModelSupport.oafTypes.forEach((name, clazz) -> spark.createDataset(sc.textFile(inputPath + "/" + name) .map(s -> new ObjectMapper().readValue(s, clazz)) .rdd(), Encoders.bean(clazz)) .write() diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java index 2cc52fb62..8162927b8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/AdjacencyListBuilderJob.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.oa.provision; -import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.EntityRelEntity; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.Tuple2; +import eu.dnetlib.dhp.schema.common.ModelSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -21,7 +21,6 @@ import java.util.List; import java.util.Optional; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getKryoClasses; /** * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. @@ -82,7 +81,7 @@ public class AdjacencyListBuilderJob { SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(getKryoClasses()); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); runWithSparkSession(conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7d3555b6c..d0035fa78 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -6,6 +6,8 @@ import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.EntityRelEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.SortableRelation; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.OafEntity; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -91,7 +93,7 @@ public class CreateRelatedEntitiesJob_phase1 { SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(getKryoClasses()); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); runWithSparkSession(conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index 2b5c627b6..0a9235cc9 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -6,6 +6,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.EntityRelEntity; import eu.dnetlib.dhp.oa.provision.model.TypedRow; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -26,7 +27,6 @@ import java.util.List; import java.util.Optional; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getKryoClasses; /** * Joins the graph nodes by resolving the links of distance = 1 to create an adjacency list of linked objects. @@ -93,7 +93,7 @@ public class CreateRelatedEntitiesJob_phase2 { SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(getKryoClasses()); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); runWithSparkSession(conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index caddfaf8d..95d2263b5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -120,6 +120,7 @@ public class PrepareRelationsJob { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, SortableRelation.class), Encoders.bean(SortableRelation.class)); } + //TODO work in progress private static void prepareRelationsRDDFromPaths(SparkSession spark, String inputRelationsPath, String outputPath, int numPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) .repartition(numPartitions); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index 398a272e2..42174ac94 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.oa.provision.utils; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; -import eu.dnetlib.dhp.oa.provision.model.SortableRelation; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.oaf.*; -import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import static org.apache.commons.lang3.StringUtils.substringAfter; @@ -15,101 +17,8 @@ public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public final static Map entityTypes = Maps.newHashMap(); - - static { - entityTypes.put(EntityType.datasource, Datasource.class); - entityTypes.put(EntityType.organization, Organization.class); - entityTypes.put(EntityType.project, Project.class); - entityTypes.put(EntityType.dataset, Dataset.class); - entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class); - entityTypes.put(EntityType.software, Software.class); - entityTypes.put(EntityType.publication, Publication.class); - } - - public enum EntityType { - publication, dataset, otherresearchproduct, software, datasource, organization, project; - - public static EntityType fromClass(Class clazz) { - switch (clazz.getName()) { - case "eu.dnetlib.dhp.schema.oaf.Publication" : return publication; - case "eu.dnetlib.dhp.schema.oaf.Dataset" : return dataset; - case "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct" : return otherresearchproduct; - case "eu.dnetlib.dhp.schema.oaf.Software" : return software; - case "eu.dnetlib.dhp.schema.oaf.Datasource" : return datasource; - case "eu.dnetlib.dhp.schema.oaf.Organization" : return organization; - case "eu.dnetlib.dhp.schema.oaf.Project" : return project; - default: throw new IllegalArgumentException("Unknown OafEntity class: " + clazz.getName()); - } - } - } - - public enum MainEntityType { - result, datasource, organization, project - } - public static Set authorPidTypes = Sets.newHashSet("orcid", "magidentifier"); - private static final String schemeTemplate = "dnet:%s_%s_relations"; - - private static Map entityMapping = Maps.newHashMap(); - - static { - entityMapping.put(EntityType.publication, MainEntityType.result); - entityMapping.put(EntityType.dataset, MainEntityType.result); - entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); - entityMapping.put(EntityType.software, MainEntityType.result); - entityMapping.put(EntityType.datasource, MainEntityType.datasource); - entityMapping.put(EntityType.organization, MainEntityType.organization); - entityMapping.put(EntityType.project, MainEntityType.project); - } - - public static Class[] getKryoClasses() { - return new Class[]{ - Author.class, - Context.class, - Country.class, - DataInfo.class, - eu.dnetlib.dhp.schema.oaf.Dataset.class, - Datasource.class, - ExternalReference.class, - ExtraInfo.class, - Field.class, - GeoLocation.class, - Instance.class, - Journal.class, - KeyValue.class, - Oaf.class, - OafEntity.class, - OAIProvenance.class, - Organization.class, - OriginDescription.class, - OtherResearchProduct.class, - Project.class, - Publication.class, - Qualifier.class, - Relation.class, - SortableRelation.class, //SUPPORT - Result.class, - Software.class, - StructuredProperty.class - }; - } - - public static String getScheme(final String sourceType, final String targetType) { - return String.format(schemeTemplate, - entityMapping.get(EntityType.valueOf(sourceType)).name(), - entityMapping.get(EntityType.valueOf(targetType)).name()); - } - - public static String getMainType(final EntityType type) { - return entityMapping.get(type).name(); - } - - public static boolean isResult(EntityType type) { - return MainEntityType.result.name().equals(getMainType(type)); - } - public static RelatedEntity asRelatedEntity(E entity, Class clazz) { final RelatedEntity re = new RelatedEntity(); @@ -119,7 +28,7 @@ public class GraphMappingUtils { re.setPid(entity.getPid()); re.setCollectedfrom(entity.getCollectedfrom()); - switch (GraphMappingUtils.EntityType.fromClass(clazz)) { + switch (EntityType.fromClass(clazz)) { case publication: case dataset: case otherresearchproduct: diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 5cf881f00..aced9ac0a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -9,10 +9,14 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; import eu.dnetlib.dhp.oa.provision.model.*; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.MainEntityType; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; +import org.codehaus.janino.Mod; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; @@ -78,13 +82,13 @@ public class XmlRecordFactory implements Serializable { final OafEntity entity = toOafEntity(je.getEntity()); TemplateFactory templateFactory = new TemplateFactory(); try { - final EntityType type = GraphMappingUtils.EntityType.valueOf(je.getEntity().getType()); + final EntityType type = EntityType.valueOf(je.getEntity().getType()); final List metadata = metadata(type, entity, contexts); // rels has to be processed before the contexts because they enrich the contextMap with the funding info. final List relations = listRelations(je, templateFactory, contexts); - final String mainType = getMainType(type); + final String mainType = ModelSupport.getMainType(type); metadata.addAll(buildContexts(mainType, contexts)); metadata.add(XmlSerializationUtils.parseDataInfo(entity.getDataInfo())); @@ -106,7 +110,7 @@ public class XmlRecordFactory implements Serializable { private static OafEntity parseOaf(final String json, final String type) { try { - switch (GraphMappingUtils.EntityType.valueOf(type)) { + switch (EntityType.valueOf(type)) { case publication: return OBJECT_MAPPER.readValue(json, Publication.class); case dataset: @@ -168,7 +172,7 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } - if (GraphMappingUtils.isResult(type)) { + if (ModelSupport.isResult(type)) { final Result r = (Result) entity; if (r.getContext() != null) { @@ -756,7 +760,7 @@ public class XmlRecordFactory implements Serializable { } final DataInfo info = rel.getDataInfo(); - final String scheme = getScheme(re.getType(), targetType); + final String scheme = ModelSupport.getScheme(re.getType(), targetType); if (StringUtils.isBlank(scheme)) { throw new IllegalArgumentException(String.format("missing scheme for: <%s - %s>", re.getType(), targetType)); @@ -782,7 +786,7 @@ public class XmlRecordFactory implements Serializable { final List children = Lists.newArrayList(); EntityType entityType = EntityType.valueOf(type); - if (MainEntityType.result.toString().equals(getMainType(entityType))) { + if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) { final List instances = ((Result) entity).getInstance(); if (instances != null) { for (final Instance instance : ((Result) entity).getInstance()) {