Scholexplorer Datasource Aggregation:

- Added collectedfrom in the inverse relation generated
Relation resolution:
- increased number of partitions in workflow.xml
- using classid instead of classname to build the pid-dnetId mapping
pull/157/head
Sandro La Bruzzo 3 years ago
parent aafdffa6b3
commit 4acfa8fa2e

@ -34,6 +34,12 @@ object CollectionUtils {
inverse.setRelType(currentRel.getRelType)
inverse.setSubRelType(currentRel.getSubReltype)
inverse.setRelClass(currentRel.getInverseRelClass)
inverse.setCollectedfrom(r.getCollectedfrom)
inverse.setDataInfo(r.getDataInfo)
inverse.setProperties(r.getProperties)
inverse.setLastupdatetimestamp(r.getLastupdatetimestamp)
inverse.setValidated(r.getValidated)
inverse.setValidationDate(r.getValidationDate)
return List(r, inverse)
}
}

@ -104,14 +104,14 @@ object SparkResolveRelation {
JObject(pids) <- json \\ "instance" \ "pid"
JField("value", JString(pidValue)) <- pids
JField("qualifier", JObject(qualifier)) <- pids
JField("classname", JString(pidType)) <- qualifier
JField("classid", JString(pidType)) <- qualifier
} yield (pidValue, pidType)
val alternateIds: List[(String, String)] = for {
JObject(pids) <- json \\ "alternateIdentifier"
JField("value", JString(pidValue)) <- pids
JField("qualifier", JObject(qualifier)) <- pids
JField("classname", JString(pidType)) <- qualifier
JField("classid", JString(pidType)) <- qualifier
} yield (pidValue, pidType)
(id, result ::: alternateIds)

@ -24,7 +24,7 @@
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=3000
--conf spark.sql.shuffle.partitions=8000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}

Loading…
Cancel
Save