From 62cc257e5c2b18574e78e51ea7c13be3c7031714 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 27 Mar 2020 17:07:34 +0100 Subject: [PATCH] fixed step1 workflow --- dhp-workflows/dhp-dedup-scholexplorer/pom.xml | 31 ++++++++++ dhp-workflows/dhp-graph-mapper/README.md | 3 + .../dhp/sx/graph/step1/oozie_app/workflow.xml | 59 ++++++++++++++++--- 3 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/README.md diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index f1b51a709..387952e33 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -10,6 +10,37 @@ dhp-dedup-scholexplorer + + + + net.alchim31.maven + scala-maven-plugin + 4.0.1 + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + ${scala.version} + + + + + + diff --git a/dhp-workflows/dhp-graph-mapper/README.md b/dhp-workflows/dhp-graph-mapper/README.md new file mode 100644 index 000000000..8105197b4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/README.md @@ -0,0 +1,3 @@ +# dnet-graph-mapper +Dnet-graph-mapper is a DNET module responsible +of importing the first version of graph into Hadoop Cluster. diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml index 4da737c33..ce00eff7b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml @@ -1,11 +1,20 @@ + + reuseContent + false + should import content from the aggregator or reuse a previous version + workingPath the working dir base path - targetPath + targetXMLPath + the graph Raw base path + + + targetEntityPath the graph Raw base path @@ -29,12 +38,20 @@ mongo database - user - HDFS user + entity + the entity type - + + + + + ${wf:conf('reuseContent') eq false} + ${wf:conf('reuseContent') eq true} + + + @@ -43,8 +60,8 @@ - - + + @@ -56,9 +73,8 @@ ${jobTracker} ${nameNode} eu.dnetlib.dhp.sx.graph.ImportDataFromMongo - -t${targetPath} + -t${targetXMLPath} -n${nameNode} - -u${user} -h${dbhost} -p27017 -dn${dbName} @@ -66,6 +82,33 @@ -l${layout} -i${interpretation} + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + yarn-cluster + cluster + Import ${entity} and related entities + eu.dnetlib.dhp.sx.graph.SparkScholexplorerGraphImporter + dhp-graph-mapper-${projectVersion}.jar + --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} + -mt yarn-cluster + --sourcePath${targetXMLPath} + --targetPath${targetEntityPath} + --entity${entity} +