forked from D-Net/dnet-hadoop
Use workingDir parameter for temporary data of ORCID enrichment
This commit is contained in:
parent
0e34b0ece1
commit
aa7b8fd014
|
@ -22,5 +22,11 @@
|
||||||
"paramLongName": "targetPath",
|
"paramLongName": "targetPath",
|
||||||
"paramDescription": "the output path of the graph enriched",
|
"paramDescription": "the output path of the graph enriched",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "wp",
|
||||||
|
"paramLongName": "workingDir",
|
||||||
|
"paramDescription": "the working dir",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
log.info(s"orcidPath is '$orcidPath'")
|
log.info(s"orcidPath is '$orcidPath'")
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
log.info(s"targetPath is '$targetPath'")
|
log.info(s"targetPath is '$targetPath'")
|
||||||
|
val workingDir = parser.get("workingDir")
|
||||||
|
log.info(s"targetPath is '$workingDir'")
|
||||||
|
|
||||||
createTemporaryData(graphPath, orcidPath, targetPath)
|
createTemporaryData(graphPath, orcidPath, workingDir)
|
||||||
analisys(targetPath)
|
analisys(workingDir)
|
||||||
generateGraph(graphPath, targetPath)
|
generateGraph(graphPath, workingDir, targetPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def generateGraph(graphPath: String, targetPath: String): Unit = {
|
private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {
|
||||||
|
|
||||||
ModelSupport.entityTypes.asScala
|
ModelSupport.entityTypes.asScala
|
||||||
.filter(e => ModelSupport.isResult(e._1))
|
.filter(e => ModelSupport.isResult(e._1))
|
||||||
|
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
|
|
||||||
val matched = spark.read
|
val matched = spark.read
|
||||||
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
|
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
|
||||||
.parquet(s"${targetPath}/${resultType}_matched")
|
.parquet(s"${workingDir}/${resultType}_matched")
|
||||||
.selectExpr("id", "enriched_author")
|
.selectExpr("id", "enriched_author")
|
||||||
|
|
||||||
spark.read
|
spark.read
|
||||||
|
|
Loading…
Reference in New Issue