1
0
Fork 0

Scholexplorer Datasource Aggregation:

- Added collectedfrom in the inverse relation generated
Relation resolution:
- increased number of partitions in workflow.xml
- using classid instead of classname to build the pid-dnetId mapping
This commit is contained in:
Sandro La Bruzzo 2021-10-26 17:51:20 +02:00
parent aafdffa6b3
commit 4acfa8fa2e
3 changed files with 9 additions and 3 deletions

View File

@ -34,6 +34,12 @@ object CollectionUtils {
inverse.setRelType(currentRel.getRelType)
inverse.setSubRelType(currentRel.getSubReltype)
inverse.setRelClass(currentRel.getInverseRelClass)
inverse.setCollectedfrom(r.getCollectedfrom)
inverse.setDataInfo(r.getDataInfo)
inverse.setProperties(r.getProperties)
inverse.setLastupdatetimestamp(r.getLastupdatetimestamp)
inverse.setValidated(r.getValidated)
inverse.setValidationDate(r.getValidationDate)
return List(r, inverse)
}
}

View File

@ -104,14 +104,14 @@ object SparkResolveRelation {
JObject(pids) <- json \\ "instance" \ "pid"
JField("value", JString(pidValue)) <- pids
JField("qualifier", JObject(qualifier)) <- pids
JField("classname", JString(pidType)) <- qualifier
JField("classid", JString(pidType)) <- qualifier
} yield (pidValue, pidType)
val alternateIds: List[(String, String)] = for {
JObject(pids) <- json \\ "alternateIdentifier"
JField("value", JString(pidValue)) <- pids
JField("qualifier", JObject(qualifier)) <- pids
JField("classname", JString(pidType)) <- qualifier
JField("classid", JString(pidType)) <- qualifier
} yield (pidValue, pidType)
(id, result ::: alternateIds)

View File

@ -24,7 +24,7 @@
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=3000
--conf spark.sql.shuffle.partitions=8000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}