forked from D-Net/dnet-hadoop
Scholexplorer Datasource Aggregation:
- Added collectedfrom in the inverse relation generated Relation resolution: - increased number of partitions in workflow.xml - using classid instead of classname to build the pid-dnetId mapping
This commit is contained in:
parent
aafdffa6b3
commit
4acfa8fa2e
|
@ -34,6 +34,12 @@ object CollectionUtils {
|
|||
inverse.setRelType(currentRel.getRelType)
|
||||
inverse.setSubRelType(currentRel.getSubReltype)
|
||||
inverse.setRelClass(currentRel.getInverseRelClass)
|
||||
inverse.setCollectedfrom(r.getCollectedfrom)
|
||||
inverse.setDataInfo(r.getDataInfo)
|
||||
inverse.setProperties(r.getProperties)
|
||||
inverse.setLastupdatetimestamp(r.getLastupdatetimestamp)
|
||||
inverse.setValidated(r.getValidated)
|
||||
inverse.setValidationDate(r.getValidationDate)
|
||||
return List(r, inverse)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,14 +104,14 @@ object SparkResolveRelation {
|
|||
JObject(pids) <- json \\ "instance" \ "pid"
|
||||
JField("value", JString(pidValue)) <- pids
|
||||
JField("qualifier", JObject(qualifier)) <- pids
|
||||
JField("classname", JString(pidType)) <- qualifier
|
||||
JField("classid", JString(pidType)) <- qualifier
|
||||
} yield (pidValue, pidType)
|
||||
|
||||
val alternateIds: List[(String, String)] = for {
|
||||
JObject(pids) <- json \\ "alternateIdentifier"
|
||||
JField("value", JString(pidValue)) <- pids
|
||||
JField("qualifier", JObject(qualifier)) <- pids
|
||||
JField("classname", JString(pidType)) <- qualifier
|
||||
JField("classid", JString(pidType)) <- qualifier
|
||||
} yield (pidValue, pidType)
|
||||
|
||||
(id, result ::: alternateIds)
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
|
|
Loading…
Reference in New Issue