1
0
Fork 0

Use workingDir parameter for temporary data of ORCID enrichment

This commit is contained in:
Giambattista Bloisi 2024-10-21 18:05:01 +02:00
parent 0e34b0ece1
commit aa7b8fd014
2 changed files with 13 additions and 5 deletions

View File

@ -22,5 +22,11 @@
"paramLongName": "targetPath", "paramLongName": "targetPath",
"paramDescription": "the output path of the graph enriched", "paramDescription": "the output path of the graph enriched",
"paramRequired": true "paramRequired": true
},
{
"paramName": "wp",
"paramLongName": "workingDir",
"paramDescription": "the working dir",
"paramRequired": true
} }
] ]

View File

@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
log.info(s"orcidPath is '$orcidPath'") log.info(s"orcidPath is '$orcidPath'")
val targetPath = parser.get("targetPath") val targetPath = parser.get("targetPath")
log.info(s"targetPath is '$targetPath'") log.info(s"targetPath is '$targetPath'")
val workingDir = parser.get("workingDir")
log.info(s"targetPath is '$workingDir'")
createTemporaryData(graphPath, orcidPath, targetPath) createTemporaryData(graphPath, orcidPath, workingDir)
analisys(targetPath) analisys(workingDir)
generateGraph(graphPath, targetPath) generateGraph(graphPath, workingDir, targetPath)
} }
private def generateGraph(graphPath: String, targetPath: String): Unit = { private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {
ModelSupport.entityTypes.asScala ModelSupport.entityTypes.asScala
.filter(e => ModelSupport.isResult(e._1)) .filter(e => ModelSupport.isResult(e._1))
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
val matched = spark.read val matched = spark.read
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema) .schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
.parquet(s"${targetPath}/${resultType}_matched") .parquet(s"${workingDir}/${resultType}_matched")
.selectExpr("id", "enriched_author") .selectExpr("id", "enriched_author")
spark.read spark.read