Use workingDir parameter for temporary data of ORCID enrichment

2024-10-21 18:05:01 +02:00 · 2024-10-21 18:05:01 +02:00 · aa7b8fd014
parent 0e34b0ece1
commit aa7b8fd014
2 changed files with 13 additions and 5 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
@ -22,5 +22,11 @@
    "paramLongName": "targetPath",
    "paramDescription": "the output path of the graph enriched",
    "paramRequired": true
+  },
+  {
+    "paramName": "wp",
+    "paramLongName": "workingDir",
+    "paramDescription": "the working dir",
+    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
    log.info(s"orcidPath is '$orcidPath'")
    val targetPath = parser.get("targetPath")
    log.info(s"targetPath is '$targetPath'")
+    val workingDir = parser.get("workingDir")
+    log.info(s"targetPath is '$workingDir'")

-    createTemporaryData(graphPath, orcidPath, targetPath)
-    analisys(targetPath)
-    generateGraph(graphPath, targetPath)
+    createTemporaryData(graphPath, orcidPath, workingDir)
+    analisys(workingDir)
+    generateGraph(graphPath, workingDir, targetPath)
  }

-  private def generateGraph(graphPath: String, targetPath: String): Unit = {
+  private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {

    ModelSupport.entityTypes.asScala
      .filter(e => ModelSupport.isResult(e._1))
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]

        val matched = spark.read
          .schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
-          .parquet(s"${targetPath}/${resultType}_matched")
+          .parquet(s"${workingDir}/${resultType}_matched")
          .selectExpr("id", "enriched_author")

        spark.read