forked from D-Net/dnet-hadoop
first version of dataset successful generated from orcid dump 2020
parent
9818e74a70
commit
6bc7dbeca7
@ -0,0 +1,7 @@
|
||||
[
|
||||
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
|
||||
{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true},
|
||||
{"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true},
|
||||
{"paramName":"ow", "paramLongName":"outputWorksPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true},
|
||||
{"paramName":"oew", "paramLongName":"outputEnrichedWorksPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true}
|
||||
]
|
@ -1,42 +0,0 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://hadoop-edge2.garr-pa1.d4science.org:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://hadoop-edge1.garr-pa1.d4science.org:18089/</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||
</property>
|
||||
</configuration>
|
@ -1,67 +0,0 @@
|
||||
<workflow-app name="Import Orcid Summaries" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_0</name>
|
||||
<value>wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid summaries</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath}/summaries/output'/>
|
||||
<mkdir path='${workingPath}/summaries/output'/>
|
||||
</fs>
|
||||
<ok to="check_exist_on_hdfs_summaries"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_summaries">
|
||||
<switch>
|
||||
<case to="ImportOrcidSummaries">
|
||||
${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))}
|
||||
</case>
|
||||
<default to="DownloadSummaries" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="DownloadSummaries">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_0}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="ImportOrcidSummaries"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ImportOrcidSummaries">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
|
||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
|
||||
<arg>-o</arg><arg>summaries/output/</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
@ -0,0 +1,101 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work"
|
||||
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||
<common:path>0000-0002-9157-3431</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Europe PubMed Central</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||
</work:title>
|
||||
<work:citation>
|
||||
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>11</common:month>
|
||||
</common:publication-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmid</common:external-id-type>
|
||||
<common:external-id-value>27899851</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmc</common:external-id-type>
|
||||
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||
<common:external-id-normalized transient="true"
|
||||
>PMC5126442</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||
<work:contributors>
|
||||
<work:contributor>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>seq0</work:contributor-sequence>
|
||||
<work:contributor-role>role0</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>creditname1</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>creditname2</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>seq2</work:contributor-sequence>
|
||||
<work:contributor-role></work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>creditname3</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence></work:contributor-sequence>
|
||||
<work:contributor-role>role3</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name></work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>seq4</work:contributor-sequence>
|
||||
<work:contributor-role>role4</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
</work:contributors>
|
||||
</work:work>
|
Loading…
Reference in New Issue