forked from D-Net/dnet-hadoop
hostedby patching to work with the updated Crossref contents
This commit is contained in:
parent
fb266efbcb
commit
db5e18c784
|
@ -223,11 +223,13 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=15000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
|
<arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
@ -253,11 +255,13 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=15000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
@ -278,6 +282,7 @@
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
|
|
@ -25,6 +25,20 @@ object SparkApplyHostedByMapToResult {
|
||||||
val i = p.getInstance().asScala
|
val i = p.getInstance().asScala
|
||||||
if (i.size == 1) {
|
if (i.size == 1) {
|
||||||
val inst: Instance = i.head
|
val inst: Instance = i.head
|
||||||
|
patchInstance(p, ei, inst)
|
||||||
|
|
||||||
|
} else if (i.size == 2) {
|
||||||
|
if (i.map(ii => ii.getCollectedfrom.getValue).contains("UnpayWall")) {
|
||||||
|
val inst: Instance = i.filter(ii => "Crossref".equals(ii.getCollectedfrom.getValue)).head
|
||||||
|
patchInstance(p, ei, inst)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p
|
||||||
|
})(Encoders.bean(classOf[Publication]))
|
||||||
|
}
|
||||||
|
|
||||||
|
private def patchInstance(p: Publication, ei: EntityInfo, inst: Instance): Unit = {
|
||||||
inst.getHostedby.setKey(ei.getHostedById)
|
inst.getHostedby.setKey(ei.getHostedById)
|
||||||
inst.getHostedby.setValue(ei.getName)
|
inst.getHostedby.setValue(ei.getName)
|
||||||
if (ei.getOpenAccess) {
|
if (ei.getOpenAccess) {
|
||||||
|
@ -39,11 +53,6 @@ object SparkApplyHostedByMapToResult {
|
||||||
inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold)
|
inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold)
|
||||||
p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance()));
|
p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p
|
|
||||||
})(Encoders.bean(classOf[Publication]))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
def main(args: Array[String]): Unit = {
|
||||||
|
|
Loading…
Reference in New Issue