From e0a777028ababa1cb48cb566d8192e4b410176c7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 18 Feb 2020 17:23:34 +0100 Subject: [PATCH] fix problem in parameters --- .../dnetlib/dhp/graph/MappingUtilsTest.java | 66 ------------------- .../dhp/graph/XmlRecordFactoryTest.java | 55 ---------------- .../eu/dnetlib/dhp/PropagationConstant.java | 4 ++ .../{countrypropagation => }/TypedRow.java | 0 .../input_countrypropagation_parameters.json | 4 +- .../countrypropagation/oozie_app/workflow.xml | 2 +- ...sulaffiliationfrominstrepo_parameters.json | 26 ++++++++ .../oozie_app/config-default.xml | 18 +++++ .../oozie_app/workflow.xml | 60 +++++++++++++++++ 9 files changed, 111 insertions(+), 124 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/MappingUtilsTest.java delete mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/XmlRecordFactoryTest.java create mode 100644 dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/PropagationConstant.java rename dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/{countrypropagation => }/TypedRow.java (100%) create mode 100644 dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json create mode 100644 dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/MappingUtilsTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/MappingUtilsTest.java deleted file mode 100644 index a9d696bea..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/MappingUtilsTest.java +++ /dev/null @@ -1,66 +0,0 @@ -package eu.dnetlib.dhp.graph; - -import eu.dnetlib.dhp.graph.model.EntityRelEntity; -import eu.dnetlib.dhp.graph.model.RelatedEntity; -import eu.dnetlib.dhp.graph.utils.GraphMappingUtils; -import org.codehaus.jackson.map.ObjectMapper; -import org.junit.Before; -import org.junit.Test; - -import java.io.IOException; -import java.io.InputStreamReader; - -public class MappingUtilsTest { - - private GraphMappingUtils utils; - - @Before - public void setUp() { - utils = new GraphMappingUtils(); - } - - @Test - public void testOafMappingDatasource() throws IOException { - - final InputStreamReader in = new InputStreamReader(getClass().getResourceAsStream("datasource.json")); - final EntityRelEntity e = new ObjectMapper().readValue(in, EntityRelEntity.class); - e.getSource().setType("datasource"); - - final EntityRelEntity out = utils.asRelatedEntity(e); - System.out.println(out); - - } - - //@Test - public void testOafMappingResult() throws IOException { - - final InputStreamReader in = new InputStreamReader(getClass().getResourceAsStream("result.json")); - final EntityRelEntity e = new ObjectMapper().readValue(in, EntityRelEntity.class); - - final EntityRelEntity out = utils.asRelatedEntity(e); - System.out.println(out); - - } - - @Test - public void testOafMappingSoftware() throws IOException { - - final InputStreamReader in = new InputStreamReader(getClass().getResourceAsStream("software.json")); - final EntityRelEntity e = new ObjectMapper().readValue(in, EntityRelEntity.class); - - final EntityRelEntity out = utils.asRelatedEntity(e); - System.out.println(out); - - } - - - @Test - public void testParseRelatedEntity() throws IOException { - - final InputStreamReader in = new InputStreamReader(getClass().getResourceAsStream("related_entity.json")); - final RelatedEntity e = new ObjectMapper().readValue(in, RelatedEntity.class); - - System.out.println(e); - - } -} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/XmlRecordFactoryTest.java deleted file mode 100644 index 2a3c343ec..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/XmlRecordFactoryTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package eu.dnetlib.dhp.graph; - -import eu.dnetlib.dhp.graph.utils.ContextMapper; -import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.spark.sql.SparkSession; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -public class XmlRecordFactoryTest { - - private static final Log log = LogFactory.getLog(XmlRecordFactoryTest.class); - - private Path testDir; - - @Before - public void setup() throws IOException { - testDir = Files.createTempDirectory(getClass().getSimpleName()); - log.info("created test directory " + testDir.toString()); - } - - @After - public void tearDown() throws IOException { - FileUtils.deleteDirectory(testDir.toFile()); - log.info("deleted test directory " + testDir.toString()); - } - - @Test - public void testXmlSerialization() throws Exception { - - final SparkSession spark = SparkSession - .builder() - .appName(SparkXmlRecordBuilderJob.class.getSimpleName()) - .master("local[*]") - .getOrCreate(); - - final String inputDir = testDir.toString() + "/3_joined_entities"; - FileUtils.forceMkdir(new File(inputDir)); - FileUtils.copyFile(new File("/Users/claudio/Downloads/joined_entities-part-00000"), new File(inputDir + "/joined_entities-part-00000")); - - final ContextMapper ctx = ContextMapper.fromIS("https://dev-openaire.d4science.org:443/is/services/isLookUp"); - - final GraphJoiner g = new GraphJoiner(spark, ctx, inputDir, testDir.toString()); - - g.asXML(); - } - -} diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/PropagationConstant.java new file mode 100644 index 000000000..eed336d5b --- /dev/null +++ b/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp; + +public class PropagationConstant { +} diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/TypedRow.java b/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/TypedRow.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/TypedRow.java rename to dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/TypedRow.java diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json index cbafdcd46..090f3f152 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json @@ -3,13 +3,13 @@ "paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", - "paramRequired": true, + "paramRequired": true }, { "paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", - "paramRequired": true, + "paramRequired": true }, { "paramName":"wl", diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index af88c6fbb..f24417bc0 100644 --- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -49,7 +49,7 @@ -mt yarn-cluster --sourcePath${sourcePath} - --withelist${whitelist} + --whitelist${whitelist} --allowedtypes${allowedtypes} diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json new file mode 100644 index 000000000..090f3f152 --- /dev/null +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName":"mt", + "paramLongName":"master", + "paramDescription": "should be local or yarn", + "paramRequired": true + }, + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"wl", + "paramLongName":"whitelist", + "paramDescription": "datasource id that will be considered even if not in the allowed typology list. Split by ;", + "paramRequired": true + }, + { + "paramName":"at", + "paramLongName":"allowedtypes", + "paramDescription": "the types of the allowed datasources. Split by ;", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml new file mode 100644 index 000000000..f24417bc0 --- /dev/null +++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -0,0 +1,60 @@ + + + + sourcePath + the source path + + + whitelist + the white list + + + allowedtypes + the allowed types + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${jobTracker} + ${nameNode} + yarn-cluster + cluster + CountryPropagation + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-propagation-${projectVersion}.jar + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" + --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" + + -mt yarn-cluster + --sourcePath${sourcePath} + --whitelist${whitelist} + --allowedtypes${allowedtypes} + + + + + + + \ No newline at end of file