forked from D-Net/dnet-hadoop
Scholexplorer Datasource Aggregation:
- Added collectedfrom in the inverse relation generated Relation resolution: - increased number of partitions in workflow.xml - using classid instead of classname to build the pid-dnetId mapping
This commit is contained in:
parent
aafdffa6b3
commit
4acfa8fa2e
|
@ -34,6 +34,12 @@ object CollectionUtils {
|
||||||
inverse.setRelType(currentRel.getRelType)
|
inverse.setRelType(currentRel.getRelType)
|
||||||
inverse.setSubRelType(currentRel.getSubReltype)
|
inverse.setSubRelType(currentRel.getSubReltype)
|
||||||
inverse.setRelClass(currentRel.getInverseRelClass)
|
inverse.setRelClass(currentRel.getInverseRelClass)
|
||||||
|
inverse.setCollectedfrom(r.getCollectedfrom)
|
||||||
|
inverse.setDataInfo(r.getDataInfo)
|
||||||
|
inverse.setProperties(r.getProperties)
|
||||||
|
inverse.setLastupdatetimestamp(r.getLastupdatetimestamp)
|
||||||
|
inverse.setValidated(r.getValidated)
|
||||||
|
inverse.setValidationDate(r.getValidationDate)
|
||||||
return List(r, inverse)
|
return List(r, inverse)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -104,14 +104,14 @@ object SparkResolveRelation {
|
||||||
JObject(pids) <- json \\ "instance" \ "pid"
|
JObject(pids) <- json \\ "instance" \ "pid"
|
||||||
JField("value", JString(pidValue)) <- pids
|
JField("value", JString(pidValue)) <- pids
|
||||||
JField("qualifier", JObject(qualifier)) <- pids
|
JField("qualifier", JObject(qualifier)) <- pids
|
||||||
JField("classname", JString(pidType)) <- qualifier
|
JField("classid", JString(pidType)) <- qualifier
|
||||||
} yield (pidValue, pidType)
|
} yield (pidValue, pidType)
|
||||||
|
|
||||||
val alternateIds: List[(String, String)] = for {
|
val alternateIds: List[(String, String)] = for {
|
||||||
JObject(pids) <- json \\ "alternateIdentifier"
|
JObject(pids) <- json \\ "alternateIdentifier"
|
||||||
JField("value", JString(pidValue)) <- pids
|
JField("value", JString(pidValue)) <- pids
|
||||||
JField("qualifier", JObject(qualifier)) <- pids
|
JField("qualifier", JObject(qualifier)) <- pids
|
||||||
JField("classname", JString(pidType)) <- qualifier
|
JField("classid", JString(pidType)) <- qualifier
|
||||||
} yield (pidValue, pidType)
|
} yield (pidValue, pidType)
|
||||||
|
|
||||||
(id, result ::: alternateIds)
|
(id, result ::: alternateIds)
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.shuffle.partitions=3000
|
--conf spark.sql.shuffle.partitions=8000
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
|
Loading…
Reference in New Issue