merge upstream
commit
896919e735
@ -0,0 +1,18 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
@ -0,0 +1,114 @@
|
||||
<workflow-app name="create broker events" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>outputDir</name>
|
||||
<description>the path where the the generated data will be stored</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>brokerApiBaseUrl</name>
|
||||
<description>the url of the broker service api</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>brokerDbUrl</name>
|
||||
<description>the url of the broker database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>brokerDbUser</name>
|
||||
<description>the user of the broker database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>brokerDbPassword</name>
|
||||
<description>the password of the broker database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="stats"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="stats">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateStatsJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.GenerateStatsJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||
<arg>--dbUrl</arg><arg>${brokerDbUrl}</arg>
|
||||
<arg>--dbUser</arg><arg>${brokerDbUser}</arg>
|
||||
<arg>--dbPassword</arg><arg>${brokerDbPassword}</arg>
|
||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
@ -1,4 +1,5 @@
|
||||
<rel inferred="$inferred$" trust="$trust$" inferenceprovenance="$inferenceprovenance$" provenanceaction="$provenanceaction$">
|
||||
<to class="$class$" scheme="$scheme$" type="$type$">$objIdentifier$</to>
|
||||
$if(validated)$<validated date="$validationdate$"/>$else$$endif$
|
||||
<to class="$class$" scheme="$scheme$" type="$type$">$objIdentifier$</to>
|
||||
$metadata:{ it | $it$ }$
|
||||
</rel>
|
@ -0,0 +1,109 @@
|
||||
{
|
||||
"id": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d",
|
||||
"originalId": [],
|
||||
"pid": [],
|
||||
"dateofcollection": "2020-01-01",
|
||||
"dateoftransformation": "2020-01-01",
|
||||
"extraInfo": [],
|
||||
"oaiprovenance": null,
|
||||
"websiteurl": {
|
||||
"value": "https://web.site",
|
||||
"datainfo": null
|
||||
},
|
||||
"code": {
|
||||
"value": "79a0e",
|
||||
"datainfo": null
|
||||
},
|
||||
"acronym": {
|
||||
"value": "79a0e_acronym",
|
||||
"datainfo": null
|
||||
},
|
||||
"title": {
|
||||
"value": "79a0e_title",
|
||||
"datainfo": null
|
||||
},
|
||||
"startdate": {
|
||||
"value": "2019-02-01",
|
||||
"datainfo": null
|
||||
},
|
||||
"enddate": {
|
||||
"value": "2021-01-09",
|
||||
"datainfo": null
|
||||
},
|
||||
"callidentifier": {
|
||||
"value": "79a0e_callID",
|
||||
"datainfo": null
|
||||
},
|
||||
"keywords": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"duration": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"ecsc39": {
|
||||
"value": "true",
|
||||
"datainfo": null
|
||||
},
|
||||
"oamandatepublications": {
|
||||
"value": "true",
|
||||
"datainfo": null
|
||||
},
|
||||
"ecarticle29_3": {
|
||||
"value": "false",
|
||||
"datainfo": null
|
||||
},
|
||||
"optional1": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"optional2": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"jsonextrainfo":{
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"contactfullname":{
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"contactfax": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"contactphone": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"contactemail": {
|
||||
"value": "",
|
||||
"datainfo": null
|
||||
},
|
||||
"summary": {
|
||||
"value": "79a0e_description",
|
||||
"datainfo": null
|
||||
},
|
||||
"currency": {
|
||||
"value": "EUR",
|
||||
"datainfo": null
|
||||
},
|
||||
"totalcost": 120000,
|
||||
"fundedamount": 18000,
|
||||
"h2020topiccode": "",
|
||||
"h2020topicdescription": "",
|
||||
"h2020classification": [],
|
||||
"subjects": [
|
||||
{
|
||||
"value": "",
|
||||
"qualifier": null,
|
||||
"datainfo": null
|
||||
}
|
||||
],
|
||||
"fundingtree": []
|
||||
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,31 @@
|
||||
{
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3",
|
||||
"value": "AMS Acta",
|
||||
"dataInfo": null
|
||||
}
|
||||
],
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"trust": "0.9",
|
||||
"inferenceprovenance": "",
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:repository",
|
||||
"classname": "sysimport:crosswalk:repository",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
},
|
||||
"lastupdatetimestamp": 1606898557407,
|
||||
"relType": "resultProject",
|
||||
"subRelType": "outcome",
|
||||
"relClass": "isProducedBy",
|
||||
"source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
|
||||
"target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d",
|
||||
"validated": null,
|
||||
"validationDate": null,
|
||||
"properties": []
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
{
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3",
|
||||
"value": "AMS Acta",
|
||||
"dataInfo": null
|
||||
}
|
||||
],
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"trust": "0.9",
|
||||
"inferenceprovenance": "",
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:repository",
|
||||
"classname": "sysimport:crosswalk:repository",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
},
|
||||
"lastupdatetimestamp": 1606898557407,
|
||||
"relType": "resultProject",
|
||||
"subRelType": "outcome",
|
||||
"relClass": "isProducedBy",
|
||||
"source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
|
||||
"target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d",
|
||||
"validated": true,
|
||||
"validationDate": "2021-01-01",
|
||||
"properties": []
|
||||
}
|
Loading…
Reference in New Issue