[GRAPH DUMP] Add filtering for relation we do not want to dump. It is based on the relclass
parent
8905a39bf3
commit
58bc3f223a
@ -1,30 +0,0 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<value>openaire</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
@ -1,431 +0,0 @@
|
||||
<workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<description>the isLookup service endpoint</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>accessToken</name>
|
||||
<description>the access token used for the deposition in Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>connectionUrl</name>
|
||||
<description>the connection url for Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metadata</name>
|
||||
<description> the metadata associated to the deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>depositionType</name>
|
||||
<description>one among {new, update, version}</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>conceptRecordId</name>
|
||||
<description>for new version, the id of the record for the old deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<description>the target hive database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<description>hive server jdbc url</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="save_community_map"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="save_community_map">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</java>
|
||||
<ok to="fork_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_dump">
|
||||
<path start="dump_publication"/>
|
||||
<path start="dump_dataset"/>
|
||||
<path start="dump_orp"/>
|
||||
<path start="dump_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="dump_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table publication for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table dataset for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table ORP for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table software for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_dump" to="prepareResultProject"/>
|
||||
|
||||
<action name="prepareResultProject">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Prepare association result subset of project info</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="fork_extendWithProject"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_extendWithProject">
|
||||
<path start="extend_publication"/>
|
||||
<path start="extend_dataset"/>
|
||||
<path start="extend_orp"/>
|
||||
<path start="extend_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="extend_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped publications with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped dataset with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped ORP with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped software with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_extend" to="splitForCommunities"/>
|
||||
|
||||
<action name="splitForCommunities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Split dumped result for community</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/split</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="make_archive"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="make_archive">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/split</arg>
|
||||
</java>
|
||||
<ok to="send_zenodo"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="send_zenodo">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
@ -1,30 +0,0 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<value>openaire</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
@ -1,586 +0,0 @@
|
||||
<workflow-app name="dump_complete_graph" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<description>the isLookup service endpoint</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>resultAggregation</name>
|
||||
<description>true if all the result type have to be dumped under result. false otherwise</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>accessToken</name>
|
||||
<description>the access token used for the deposition in Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>connectionUrl</name>
|
||||
<description>the connection url for Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metadata</name>
|
||||
<description> the metadata associated to the deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>depositionType</name>
|
||||
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>conceptRecordId</name>
|
||||
<description>for new version, the id of the record for the old deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>depositionId</name>
|
||||
<description>the depositionId of a deposition open that has to be added content</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>organizationCommunityMap</name>
|
||||
<description>the organization community map</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<description>the target hive database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<description>hive server jdbc url</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="save_community_map"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="save_community_map">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</java>
|
||||
<ok to="fork_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_dump">
|
||||
<path start="dump_publication"/>
|
||||
<path start="dump_dataset"/>
|
||||
<path start="dump_orp"/>
|
||||
<path start="dump_software"/>
|
||||
<path start="dump_organization"/>
|
||||
<path start="dump_project"/>
|
||||
<path start="dump_datasource"/>
|
||||
<path start="dump_relation"/>
|
||||
</fork>
|
||||
|
||||
<action name="dump_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table publication </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table dataset </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table ORP </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table software </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_organization">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table organization </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/organization</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/collect/organization</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_project">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table project </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/collect/project</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_datasource">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table datasource </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/collect/datasource</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_relation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table relation </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation/relation</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_dump" to="fork_context"/>
|
||||
|
||||
<fork name="fork_context">
|
||||
<path start="create_entities_fromcontext"/>
|
||||
<path start="create_relation_fromcontext"/>
|
||||
<path start="create_relation_fromorgs"/>
|
||||
</fork>
|
||||
|
||||
<action name="create_entities_fromcontext">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/collect/communities_infrastructures/communities_infrastructure.json.gz</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</java>
|
||||
<ok to="join_context"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="create_relation_fromcontext">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</java>
|
||||
<ok to="join_context"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="create_relation_fromorgs">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table relation </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
|
||||
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_context"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_context" to="fork_extract_relations"/>
|
||||
|
||||
<fork name="fork_extract_relations">
|
||||
<path start="rels_from_pubs"/>
|
||||
<path start="rels_from_dats"/>
|
||||
<path start="rels_from_orp"/>
|
||||
<path start="rels_from_sw"/>
|
||||
</fork>
|
||||
|
||||
<action name="rels_from_pubs">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extract Relations from publication </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation/publication</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_extract_relations"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="rels_from_dats">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table dataset </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation/dataset</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_extract_relations"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="rels_from_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table ORP </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation/orp</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_extract_relations"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="rels_from_sw">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table software </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/relation/software</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
</spark>
|
||||
<ok to="join_extract_relations"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_extract_relations" to="collect_and_save"/>
|
||||
|
||||
<action name="collect_and_save">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Collect Results and Relations and put them in the right path </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/collect</arg>
|
||||
<arg>--resultAggregation</arg><arg>${resultAggregation}</arg>
|
||||
</spark>
|
||||
<ok to="make_archive"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="make_archive">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/collect</arg>
|
||||
</java>
|
||||
<ok to="send_zenodo"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="send_zenodo">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
@ -1,37 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the research community/research infrastructure"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE id of the research community/research infrastructure"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The long name of the community"
|
||||
},
|
||||
"originalId": {
|
||||
"type": "string",
|
||||
"description": "The acronym of the community"
|
||||
},
|
||||
"subject": {
|
||||
"description": "Only for research communities: the list of the subjects associated to the research community",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "One of {Research Community, Research infrastructure}"
|
||||
},
|
||||
"zenodo_community": {
|
||||
"type": "string",
|
||||
"description": "The URL of the Zenodo community associated to the Research community/Research infrastructure"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,192 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"ControlledField": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"type": "string"
|
||||
},
|
||||
"value": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"accessrights": {
|
||||
"type": "string",
|
||||
"description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}"
|
||||
},
|
||||
"certificates": {
|
||||
"type": "string",
|
||||
"description": "The certificate, seal or standard the data source complies with. As defined by re3data.org."
|
||||
},
|
||||
"citationguidelineurl": {
|
||||
"type": "string",
|
||||
"description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org."
|
||||
},
|
||||
"contenttypes": {
|
||||
"description": "Types of content in the data source, as defined by OpenDOAR",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"databaseaccessrestriction": {
|
||||
"type": "string",
|
||||
"description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
|
||||
},
|
||||
"datasourcetype": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ControlledField"
|
||||
},
|
||||
{
|
||||
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
|
||||
}
|
||||
]
|
||||
},
|
||||
"datauploadrestriction": {
|
||||
"type": "string",
|
||||
"description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}"
|
||||
},
|
||||
"dateofvalidation": {
|
||||
"type": "string",
|
||||
"description": "The date of last validation against the OpenAIRE guidelines for the datasource records"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"englishname": {
|
||||
"type": "string",
|
||||
"description": "The English name of the datasource"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The OpenAIRE id of the data source"
|
||||
},
|
||||
"journal": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"conferencedate": {
|
||||
"type": "string"
|
||||
},
|
||||
"conferenceplace": {
|
||||
"type": "string"
|
||||
},
|
||||
"edition": {
|
||||
"type": "string"
|
||||
},
|
||||
"ep": {
|
||||
"type": "string",
|
||||
"description": "End page"
|
||||
},
|
||||
"iss": {
|
||||
"type": "string",
|
||||
"description": "Issue number"
|
||||
},
|
||||
"issnLinking": {
|
||||
"type": "string"
|
||||
},
|
||||
"issnOnline": {
|
||||
"type": "string"
|
||||
},
|
||||
"issnPrinted": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"sp": {
|
||||
"type": "string",
|
||||
"description": "Start page"
|
||||
},
|
||||
"vol": {
|
||||
"type": "string",
|
||||
"description": "Volume"
|
||||
}
|
||||
},
|
||||
"description": "Information about the journal, if this data source is of type Journal."
|
||||
},
|
||||
"languages": {
|
||||
"description": "The languages present in the data source's content, as defined by OpenDOAR.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"logourl": {
|
||||
"type": "string"
|
||||
},
|
||||
"missionstatementurl": {
|
||||
"type": "string",
|
||||
"description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
|
||||
},
|
||||
"officialname": {
|
||||
"type": "string",
|
||||
"description": "The official name of the datasource"
|
||||
},
|
||||
"openairecompatibility": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
|
||||
},
|
||||
"originalId": {
|
||||
"description": "Original identifiers for the datasource"
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"pid": {
|
||||
"description": "Persistent identifiers of the datasource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ControlledField"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"pidsystems": {
|
||||
"type": "string",
|
||||
"description": "The persistent identifier system that is used by the data source. As defined by re3data.org"
|
||||
},
|
||||
"policies": {
|
||||
"description": "Policies of the data source, as defined in OpenDOAR.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"releaseenddate": {
|
||||
"type": "string",
|
||||
"description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org"
|
||||
},
|
||||
"releasestartdate": {
|
||||
"type": "string",
|
||||
"description": "Releasing date of the data source, as defined by re3data.org"
|
||||
},
|
||||
"subjects": {
|
||||
"description": "List of subjects associated to the datasource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"uploadrights": {
|
||||
"type": "string",
|
||||
"description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}"
|
||||
},
|
||||
"versioning": {
|
||||
"type": "boolean",
|
||||
"description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
|
||||
},
|
||||
"websiteurl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"alternativenames": {
|
||||
"description": "Alternative names that identify the organisation",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"country": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The organisation country code"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "The organisation country label"
|
||||
}
|
||||
},
|
||||
"description": "The country of the organisation"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The OpenAIRE id for the organisation"
|
||||
},
|
||||
"legalname": {
|
||||
"type": "string"
|
||||
},
|
||||
"legalshortname": {
|
||||
"type": "string"
|
||||
},
|
||||
"pid": {
|
||||
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"type": "string",
|
||||
"description": "The scheme of the identifier (i.e. isni)"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "the value in the schema (i.e. 0000000090326370)"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"websiteurl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,119 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"acronym": {
|
||||
"type": "string"
|
||||
},
|
||||
"callidentifier": {
|
||||
"type": "string"
|
||||
},
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The grant agreement number"
|
||||
},
|
||||
"enddate": {
|
||||
"type": "string"
|
||||
},
|
||||
"funding": {
|
||||
"description": "Funding information for the project",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"funding_stream": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the funding stream"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Id of the funding stream"
|
||||
}
|
||||
}
|
||||
},
|
||||
"jurisdiction": {
|
||||
"type": "string",
|
||||
"description": "The jurisdiction of the funder (i.e. EU)"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the funder (European Commission)"
|
||||
},
|
||||
"shortName": {
|
||||
"type": "string",
|
||||
"description": "The short name of the funder (EC)"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"granted": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"currency": {
|
||||
"type": "string",
|
||||
"description": "The currency of the granted amount (e.g. EUR)"
|
||||
},
|
||||
"fundedamount": {
|
||||
"type": "number",
|
||||
"description": "The funded amount"
|
||||
},
|
||||
"totalcost": {
|
||||
"type": "number",
|
||||
"description": "The total cost of the project"
|
||||
}
|
||||
},
|
||||
"description": "The money granted to the project"
|
||||
},
|
||||
"h2020programme": {
|
||||
"description": "The h2020 programme funding the project",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The code of the programme"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "The description of the programme"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE id for the project"
|
||||
},
|
||||
"keywords": {
|
||||
"type": "string"
|
||||
},
|
||||
"openaccessmandatefordataset": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"openaccessmandateforpublications": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"startdate": {
|
||||
"type": "string"
|
||||
},
|
||||
"subject": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"websiteurl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The OpenAIRE id of the entity"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type of the entity (i.e. organisation)"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"type": "string",
|
||||
"description": "The reason why OpenAIRE holds the relation "
|
||||
},
|
||||
"trust": {
|
||||
"type": "string",
|
||||
"description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)"
|
||||
}
|
||||
}
|
||||
},
|
||||
"reltype": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The semantics of the relation (i.e. isAuthorInstitutionOf). "
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "the type of the relation (i.e. affiliation)"
|
||||
}
|
||||
},
|
||||
"description": "To represent the semantics of a relation between two entities"
|
||||
},
|
||||
"source": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Node"},
|
||||
{"description": "The node source in the relation"}
|
||||
]
|
||||
},
|
||||
"target": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Node"},
|
||||
{"description": "The node target in the relation"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
@ -1,398 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"AccessRight":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "Label for the access mode"
|
||||
},
|
||||
"scheme": {
|
||||
"type": "string",
|
||||
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ControlledField": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"type": "string"
|
||||
},
|
||||
"value": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
|
||||
},
|
||||
"Provenance": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"type": "string",
|
||||
"description": "The process that produced/provided the information"
|
||||
},
|
||||
"trust": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "Indicates the process that produced (or provided) the information, and the trust associated to the information"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"author": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"fullname": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"pid": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/ControlledField"},
|
||||
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
|
||||
]
|
||||
},
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Provenance of author's pid"}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"rank": {
|
||||
"type": "integer"
|
||||
},
|
||||
"surname": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"bestaccessright": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "Label for the access mode"
|
||||
},
|
||||
"scheme": {
|
||||
"type": "string",
|
||||
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
|
||||
}
|
||||
},
|
||||
"description": "The openest access right associated to the manifestations of this research results"
|
||||
},
|
||||
"codeRepositoryUrl": {
|
||||
"type": "string",
|
||||
"description": "Only for results with type 'software': the URL to the repository with the source code"
|
||||
},
|
||||
"contactgroup": {
|
||||
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"contactperson": {
|
||||
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"conferencedate": {
|
||||
"type": "string"
|
||||
},
|
||||
"conferenceplace": {
|
||||
"type": "string"
|
||||
},
|
||||
"edition": {
|
||||
"type": "string",
|
||||
"description": "Edition of the journal or conference proceeding"
|
||||
},
|
||||
"ep": {
|
||||
"type": "string",
|
||||
"description": "End page"
|
||||
},
|
||||
"iss": {
|
||||
"type": "string",
|
||||
"description": "Journal issue"
|
||||
},
|
||||
"issnLinking": {
|
||||
"type": "string"
|
||||
},
|
||||
"issnOnline": {
|
||||
"type": "string"
|
||||
},
|
||||
"issnPrinted": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name of the journal or conference"
|
||||
},
|
||||
"sp": {
|
||||
"type": "string",
|
||||
"description": "start page"
|
||||
},
|
||||
"vol": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "Container has information about the conference or journal where the result has been presented or published"
|
||||
},
|
||||
"contributor": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"description": "Description of contributor"
|
||||
}
|
||||
},
|
||||
"country": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "ISO 3166-1 alpha-2 country code"
|
||||
},
|
||||
"label": {
|
||||
"type": "string"
|
||||
},
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Why this result is associated to the country."}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"coverage": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"dateofcollection": {
|
||||
"type": "string",
|
||||
"description": "When OpenAIRE collected the record the last time"
|
||||
},
|
||||
"description": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"documentationUrl": {
|
||||
"description": "Only for results with type 'software': URL to the software documentation",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"embargoenddate": {
|
||||
"type": "string",
|
||||
"description": "Date when the embargo ends and this result turns Open Access"
|
||||
},
|
||||
"format": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"geolocation": {
|
||||
"description": "Geolocation information",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"box": {
|
||||
"type": "string"
|
||||
},
|
||||
"place": {
|
||||
"type": "string"
|
||||
},
|
||||
"point": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE Identifier"
|
||||
},
|
||||
"language": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "alpha-3/ISO 639-2 code of the language"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "English label"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lastupdatetimestamp": {
|
||||
"type": "integer",
|
||||
"description": "Timestamp of last update of the record in OpenAIRE"
|
||||
},
|
||||
"maintitle": {
|
||||
"type": "string"
|
||||
},
|
||||
"originalId": {
|
||||
"description": "Identifiers of the record at the original sources",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"pid": {
|
||||
"description": "Persistent identifiers of the result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/ControlledField"},
|
||||
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "}
|
||||
]
|
||||
}
|
||||
},
|
||||
"instance":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"accessright":{
|
||||
"allOf":[
|
||||
{
|
||||
"$ref":"#/definitions/AccessRight"
|
||||
},
|
||||
{
|
||||
"description":"The accessright of this materialization of the result"
|
||||
}
|
||||
]
|
||||
},
|
||||
"articleprocessingcharge":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"amount":{
|
||||
"type":"string"
|
||||
},
|
||||
"currency":{
|
||||
"type":"string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"license":{
|
||||
"type":"string"
|
||||
},
|
||||
"publicationdate":{
|
||||
"type":"string"
|
||||
},
|
||||
"refereed":{
|
||||
"type":"string"
|
||||
},
|
||||
"type":{
|
||||
"type":"string",
|
||||
"description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
|
||||
},
|
||||
"url":{
|
||||
"description":"Description of url",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string",
|
||||
"description":"urls where it is possible to access the materialization of the result"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description":"One of the materialization for this result"
|
||||
}
|
||||
},
|
||||
"programmingLanguage": {
|
||||
"type": "string",
|
||||
"description": "Only for results with type 'software': the programming language"
|
||||
},
|
||||
"publicationdate": {
|
||||
"type": "string"
|
||||
},
|
||||
"publisher": {
|
||||
"type": "string"
|
||||
},
|
||||
"size": {
|
||||
"type": "string",
|
||||
"description": "Only for results with type 'dataset': the declared size of the dataset"
|
||||
},
|
||||
"source": {
|
||||
"description": "See definition of Dublin Core field dc:source",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"subjects": {
|
||||
"description": "Keywords associated to the result",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Provenance"},
|
||||
{"description": "Why this subject is associated to the result"}
|
||||
]
|
||||
},
|
||||
"subject": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/ControlledField"},
|
||||
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"subtitle": {
|
||||
"type": "string"
|
||||
},
|
||||
"tool": {
|
||||
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version of the result"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<value>openaire</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
@ -1,563 +0,0 @@
|
||||
<workflow-app name="dump_funder_results" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>upload</name>
|
||||
<value>false</value>
|
||||
<description>true to upload the dump for the funders in Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<description>the isLookup service endpoint</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>accessToken</name>
|
||||
<description>the access token used for the deposition in Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>connectionUrl</name>
|
||||
<description>the connection url for Zenodo</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metadata</name>
|
||||
<description> the metadata associated to the deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>depositionType</name>
|
||||
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>conceptRecordId</name>
|
||||
<description>for new version, the id of the record for the old deposition</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>depositionId</name>
|
||||
<description>the depositionId of a deposition open that has to be added content</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<description>the target hive database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<description>hive server jdbc url</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="save_community_map"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="save_community_map">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</java>
|
||||
<ok to="fork_result_linked_to_projects"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_result_linked_to_projects">
|
||||
<path start="select_publication_linked_to_projects"/>
|
||||
<path start="select_dataset_linked_to_projects"/>
|
||||
<path start="select_orp_linked_to_project"/>
|
||||
<path start="select_software_linked_to_projects"/>
|
||||
</fork>
|
||||
|
||||
<action name="select_publication_linked_to_projects">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump funder results </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
|
||||
<arg>--relationPath</arg><arg>${sourcePath}/relation</arg>
|
||||
</spark>
|
||||
<ok to="join_link"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="select_dataset_linked_to_projects">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump funder results </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
|
||||
<arg>--relationPath</arg><arg>${sourcePath}/relation</arg>
|
||||
</spark>
|
||||
<ok to="join_link"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="select_orp_linked_to_project">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump funder results </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
|
||||
<arg>--relationPath</arg><arg>${sourcePath}/relation</arg>
|
||||
</spark>
|
||||
<ok to="join_link"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="select_software_linked_to_projects">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump funder results </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
|
||||
<arg>--relationPath</arg><arg>${sourcePath}/relation</arg>
|
||||
</spark>
|
||||
<ok to="join_link"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_link" to="fork_dump"/>
|
||||
|
||||
<fork name="fork_dump">
|
||||
<path start="dump_publication"/>
|
||||
<path start="dump_dataset"/>
|
||||
<path start="dump_orp"/>
|
||||
<path start="dump_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="dump_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table publication for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/result/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--dumpType</arg><arg>funder</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table dataset for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/result/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--dumpType</arg><arg>funder</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table ORP for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--dumpType</arg><arg>funder</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="dump_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump table software for community related products</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/result/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--dumpType</arg><arg>funder</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="join_dump" to="prepareResultProject"/>
|
||||
|
||||
<action name="prepareResultProject">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Prepare association result subset of project info</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="fork_extendWithProject"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_extendWithProject">
|
||||
<path start="extend_publication"/>
|
||||
<path start="extend_dataset"/>
|
||||
<path start="extend_orp"/>
|
||||
<path start="extend_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="extend_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped publications with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped dataset with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped ORP with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extend_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend dumped software with information about project</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="join_extend"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<join name="join_extend" to="dump_funder_results"/>
|
||||
|
||||
<action name="dump_funder_results">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Dump funder results </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/resultperfunder</arg>
|
||||
<arg>--relationPath</arg><arg>${sourcePath}</arg>
|
||||
</spark>
|
||||
<ok to="make_archive"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="make_archive">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/resultperfunder</arg>
|
||||
</java>
|
||||
<ok to="should_upload"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="should_upload">
|
||||
<switch>
|
||||
<case to="send_zenodo">${wf:conf('upload') eq true}</case>
|
||||
<default to="End"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="send_zenodo">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
@ -1,35 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"acronym": {
|
||||
"type": "string",
|
||||
"description": "The acronym of the community"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the research community/research infrastructure"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE id of the research community/research infrastructure"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The long name of the community"
|
||||
},
|
||||
"subject": {
|
||||
"description": "Only for research communities: the list of the subjects associated to the research community",
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "One of {Research Community, Research infrastructure}"
|
||||
},
|
||||
"zenodo_community": {
|
||||
"type": "string",
|
||||
"description": "The URL of the Zenodo community associated to the Research community/Research infrastructure"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,192 +0,0 @@
|
||||
{
|
||||
"$schema":"http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"ControlledField": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"type": "string"
|
||||
},
|
||||
"value": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
|
||||
}
|
||||
},
|
||||
"type":"object",
|
||||
"properties": {
|
||||
"accessrights": {
|
||||
"type": "string",
|
||||
"description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}"
|
||||
},
|
||||
"certificates": {
|
||||
"type": "string",
|
||||
"description": "The certificate, seal or standard the data source complies with. As defined by re3data.org."
|
||||
},
|
||||
"citationguidelineurl": {
|
||||
"type": "string",
|
||||
"description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org."
|
||||
},
|
||||
"contenttypes": {
|
||||
"description": "Types of content in the data source, as defined by OpenDOAR",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"databaseaccessrestriction": {
|
||||
"type": "string",
|
||||
"description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
|
||||
},
|
||||
"datasourcetype": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ControlledField"
|
||||
},
|
||||
{
|
||||
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
|
||||
}
|
||||
]
|
||||
},
|
||||
"datauploadrestriction": {
|
||||
"type": "string",
|
||||
"description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}"
|
||||
},
|
||||
"dateofvalidation": {
|
||||
"type": "string",
|
||||
"description": "The date of last validation against the OpenAIRE guidelines for the datasource records"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"englishname": {
|
||||
"type": "string",
|
||||
"description": "The English name of the datasource"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The OpenAIRE id of the data source"
|
||||
},
|
||||
"journal": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"conferencedate": {
|
||||
"type": "string"
|
||||
},
|
||||
"conferenceplace": {
|
||||
"type": "string"
|
||||
},
|
||||
"edition": {
|
||||
"type": "string"
|
||||
},
|
||||
"ep": {
|
||||
"type": "string",
|
||||
"description": "End page"
|
||||
},
|
||||
"iss": {
|
||||
"type": "string",
|
||||
"description": "Issue number"
|
||||
},
|
||||
"issnLinking": {
|
||||
"type": "string"
|
||||
},
|
||||
"issnOnline": {
|
||||
"type": "string"
|
||||
},
|
||||
"issnPrinted": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"sp": {
|
||||
"type": "string",
|
||||
"description": "Start page"
|
||||
},
|
||||
"vol": {
|
||||
"type": "string",
|
||||
"description": "Volume"
|
||||
}
|
||||
},
|
||||
"description": "Information about the journal, if this data source is of type Journal."
|
||||
},
|
||||
"languages": {
|
||||
"description": "The languages present in the data source's content, as defined by OpenDOAR.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"logourl": {
|
||||
"type": "string"
|
||||
},
|
||||
"missionstatementurl": {
|
||||
"type": "string",
|
||||
"description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
|
||||
},
|
||||
"officialname": {
|
||||
"type": "string",
|
||||
"description": "The official name of the datasource"
|
||||
},
|
||||
"openairecompatibility": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
|
||||
},
|
||||
"originalId": {
|
||||
"description": "Original identifiers for the datasource"
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"pid": {
|
||||
"description": "Persistent identifiers of the datasource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/ControlledField"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"pidsystems": {
|
||||
"type": "string",
|
||||
"description": "The persistent identifier system that is used by the data source. As defined by re3data.org"
|
||||
},
|
||||
"policies": {
|
||||
"description": "Policies of the data source, as defined in OpenDOAR.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"releaseenddate": {
|
||||
"type": "string",
|
||||
"description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org"
|
||||
},
|
||||
"releasestartdate": {
|
||||
"type": "string",
|
||||
"description": "Releasing date of the data source, as defined by re3data.org"
|
||||
},
|
||||
"subjects": {
|
||||
"description": "List of subjects associated to the datasource",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"uploadrights": {
|
||||
"type": "string",
|
||||
"description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}"
|
||||
},
|
||||
"versioning": {
|
||||
"type": "boolean",
|
||||
"description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
|
||||
},
|
||||
"websiteurl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"alternativenames": {
|
||||
"description": "Alternative names that identify the organisation",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"country": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The organisation country code"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"description": "The organisation country label"
|
||||
}
|
||||
},
|
||||
"description": "The country of the organisation"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The OpenAIRE id for the organisation"
|
||||
},
|
||||
"legalname": {
|
||||
"type": "string"
|
||||
},
|
||||
"legalshortname": {
|
||||
"type": "string"
|
||||
},
|
||||
"pid": {
|
||||
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"type": "string",
|
||||
"description": "The scheme of the identifier (i.e. isni)"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "the value in the schema (i.e. 0000000090326370)"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"websiteurl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,119 +0,0 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"acronym": {
|
||||
"type": "string"
|
||||
},
|
||||
"callidentifier": {
|
||||
"type": "string"
|
||||
},
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The grant agreement number"
|
||||
},
|
||||
"enddate": {
|
||||
"type": "string"
|
||||
},
|
||||
"funding": {
|
||||
"description": "Funding information for the project",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"funding_stream": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the funding stream"
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Id of the funding stream"
|
||||
}
|
||||
}
|
||||
},
|
||||
"jurisdiction": {
|
||||
"type": "string",
|
||||
"description": "The jurisdiction of the funder (i.e. EU)"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the funder (European Commission)"
|
||||
},
|
||||
"shortName": {
|
||||
"type": "string",
|
||||
"description": "The short name of the funder (EC)"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"granted": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"currency": {
|
||||
"type": "string",
|
||||
"description": "The currency of the granted amount (e.g. EUR)"
|
||||
},
|
||||
"fundedamount": {
|
||||
"type": "number",
|
||||
"description": "The funded amount"
|
||||
},
|
||||
"totalcost": {
|
||||
"type": "number",
|
||||
"description": "The total cost of the project"
|
||||
}
|
||||
},
|
||||
"description": "The money granted to the project"
|
||||
},
|
||||
"h2020programme": {
|
||||
"description": "The h2020 programme funding the project",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The code of the programme"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "The description of the programme"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "OpenAIRE id for the project"
|
||||
},
|
||||
"keywords": {
|
||||
"type": "string"
|
||||
},
|
||||
"openaccessmandatefordataset": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"openaccessmandateforpublications": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"startdate": {
|
||||
"type": "string"
|
||||
},
|
||||
"subject": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"websiteurl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
{
|
||||
"$schema":"http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The OpenAIRE id of the entity"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type of the entity (i.e. organisation)"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"type":"object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provenance": {
|
||||
"type": "string",
|
||||
"description": "The reason why OpenAIRE holds the relation "
|
||||
},
|
||||
"trust": {
|
||||
"type": "string",
|
||||
"description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)"
|
||||
}
|
||||
}
|
||||
},
|
||||
"reltype": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The semantics of the relation (i.e. isAuthorInstitutionOf). "
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "the type of the relation (i.e. affiliation)"
|
||||
}
|
||||
},
|
||||
"description": "To represent the semantics of a relation between two entities"
|
||||
},
|
||||
"source": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Node"},
|
||||
{"description": "The node source in the relation"}
|
||||
]
|
||||
},
|
||||
"target": {
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/Node"},
|
||||
{"description": "The node target in the relation"}
|
||||
]
|
||||
},
|
||||
"validated":{
|
||||
"type":"boolean",
|
||||
"description":"True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)"
|
||||
},
|
||||
"validationDate":{
|
||||
"type":"string",
|
||||
"description":"The date when the relation was collected from OpenAIRE"
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue