diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala
index 60df58e45..afd195ed0 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala
@@ -2,9 +2,11 @@ package eu.dnetlib.dhp.oa.graph.resolution
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.common.HdfsSupport
import eu.dnetlib.dhp.schema.common.EntityType
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset}
import org.apache.commons.io.IOUtils
+import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkConf
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
@@ -34,13 +36,22 @@ object SparkResolveEntities {
val unresolvedPath = parser.get("unresolvedPath")
log.info(s"unresolvedPath -> $unresolvedPath")
+ val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
+ fs.mkdirs(new Path(workingPath))
resolveEntities(spark, workingPath, unresolvedPath)
-
-
generateResolvedEntities(spark, workingPath, graphBasePath)
- }
+ // TO BE conservative we keep the original entities in the working dir
+ // and save the resolved entities on the graphBasePath
+ //In future these lines of code should be removed
+ entities.foreach {
+ e =>
+ fs.rename(new Path(s"$graphBasePath/$e"), new Path(s"$workingPath/${e}_old"))
+ fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e"))
+ }
+
+}
def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = {
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml
index 52f0e6e9d..4773fc87c 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml
@@ -4,6 +4,10 @@
graphBasePath
the path of the graph
+
+ unresolvedPath
+ the path of the unresolved Entities
+
@@ -36,5 +40,33 @@
+
+
+
+ yarn
+ cluster
+ Resolve Relations in raw graph
+ eu.dnetlib.dhp.oa.graph.resolution.SparkResolveEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.shuffle.partitions=10000
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ --masteryarn
+ --graphBasePath${graphBasePath}
+ --unresolvedPath${unresolvedPath}
+ --workingPath${workingDir}
+
+
+
+
+
+
\ No newline at end of file