1
0
Fork 0

first version of dataset successful generated from orcid dump 2020

This commit is contained in:
Enrico Ottonello 2020-11-06 13:47:50 +01:00
parent 9818e74a70
commit 6bc7dbeca7
20 changed files with 320 additions and 228 deletions

View File

@ -51,7 +51,6 @@
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${org.apache.httpcomponents.version}</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
@ -87,7 +86,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>${common.text.version}</version>
</dependency>

View File

@ -62,7 +62,7 @@ public class OrcidDSManager {
.toString(
OrcidDSManager.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json")));
parser.parseArgument(args);
hdfsServerUri = parser.get("hdfsServerUri");

View File

@ -73,7 +73,7 @@ public class ActivitiesDumpReader {
SequenceFile.Writer.valueClass(Text.class))) {
while ((entry = tais.getNextTarEntry()) != null) {
String filename = entry.getName();
StringBuffer buffer = new StringBuffer();
try {
if (entry.isDirectory() || !filename.contains("works")) {
@ -83,7 +83,7 @@ public class ActivitiesDumpReader {
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
// tarInput
String line;
StringBuffer buffer = new StringBuffer();
buffer = new StringBuffer();
while ((line = br.readLine()) != null) {
buffer.append(line);
}

View File

@ -42,7 +42,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager {
.toString(
GenOrcidAuthorWork.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json")));
"/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json")));
parser.parseArgument(args);
hdfsServerUri = parser.get("hdfsServerUri");

View File

@ -67,7 +67,7 @@ public class SparkGenEnrichedOrcidWorks {
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaPairRDD<Text, Text> summariesRDD = sc
.sequenceFile(workingPath + "summaries/output/authors.seq", Text.class, Text.class);
.sequenceFile(workingPath + "authors/authors.seq", Text.class, Text.class);
Dataset<AuthorData> summariesDataset = spark
.createDataset(
summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(),
@ -96,8 +96,8 @@ public class SparkGenEnrichedOrcidWorks {
Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
.filter(Objects::nonNull)
.toJavaRDD();
enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath);
logger.info("Works enriched data saved");
// enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath);
logger.info("Enriched works RDD ready.");
final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications");
final LongAccumulator enrichedPublications = spark
@ -132,7 +132,7 @@ public class SparkGenEnrichedOrcidWorks {
.write()
.format("parquet")
.mode(SaveMode.Overwrite)
.save(workingPath + "no_doi_dataset/output");
.save(workingPath + outputEnrichedWorksPath);
logger.info("parsedPublications: " + parsedPublications.value().toString());
logger.info("enrichedPublications: " + enrichedPublications.value().toString());

View File

@ -5,6 +5,7 @@ import java.io.IOException;
import java.text.Normalizer;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -40,7 +41,7 @@ public class AuthorMatcher {
int matchCounter = 0;
List<Integer> matchCounters = Arrays.asList(matchCounter);
Contributor contributor = null;
contributors.forEach(c -> {
contributors.stream().filter(c -> !StringUtils.isBlank(c.getCreditName())).forEach(c -> {
if (simpleMatch(c.getCreditName(), author.getName()) ||
simpleMatch(c.getCreditName(), author.getSurname()) ||
simpleMatch(c.getCreditName(), author.getOtherName())) {
@ -54,6 +55,7 @@ public class AuthorMatcher {
Optional<Contributor> optCon = contributors
.stream()
.filter(c -> c.isSimpleMatch())
.filter(c -> !StringUtils.isBlank(c.getCreditName()))
.map(c -> {
c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName()));
return c;

View File

@ -183,39 +183,34 @@ public class XMLRecordParserNoDoi {
private static List<Contributor> getContributors(VTDGen vg, VTDNav vn, AutoPilot ap)
throws XPathParseException, NavException, XPathEvalException {
List<Contributor> contributors = new ArrayList<Contributor>();
int nameIndex = 0;
ap.selectXPath("//work:contributor/work:credit-name");
ap.selectXPath("//work:contributors/work:contributor");
while (ap.evalXPath() != -1) {
Contributor contributor = new Contributor();
int t = vn.getText();
if (t >= 0) {
contributor.setCreditName(vn.toNormalizedString(t));
contributors.add(nameIndex, contributor);
nameIndex++;
if (vn.toElement(VTDNav.FIRST_CHILD, "work:credit-name")) {
int val = vn.getText();
if (val != -1) {
contributor.setCreditName(vn.toNormalizedString(val));
}
vn.toElement(VTDNav.PARENT);
}
if (contributors.size() == 0) {
return contributors;
if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-attributes")) {
if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-sequence")) {
int val = vn.getText();
if (val != -1) {
contributor.setSequence(vn.toNormalizedString(val));
}
int sequenceIndex = 0;
ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-sequence");
while (ap.evalXPath() != -1) {
int t = vn.getText();
if (t >= 0) {
contributors.get(sequenceIndex).setSequence(vn.toNormalizedString(t));
sequenceIndex++;
vn.toElement(VTDNav.PARENT);
}
if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-role")) {
int val = vn.getText();
if (val != -1) {
contributor.setRole(vn.toNormalizedString(val));
}
int roleIndex = 0;
ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-role");
while (ap.evalXPath() != -1) {
int t = vn.getText();
if (t >= 0) {
contributors.get(roleIndex).setRole(vn.toNormalizedString(t));
roleIndex++;
vn.toElement(VTDNav.PARENT);
}
vn.toElement(VTDNav.PARENT);
}
contributors.add(contributor);
}
return contributors;
}

View File

@ -0,0 +1,7 @@
[
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true},
{"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true},
{"paramName":"ow", "paramLongName":"outputWorksPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true},
{"paramName":"oew", "paramLongName":"outputEnrichedWorksPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true}
]

View File

@ -1,42 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://hadoop-edge2.garr-pa1.d4science.org:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://hadoop-edge1.garr-pa1.d4science.org:18089/</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -1,67 +0,0 @@
<workflow-app name="Import Orcid Summaries" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>shell_cmd_0</name>
<value>wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid summaries</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/summaries/output'/>
<mkdir path='${workingPath}/summaries/output'/>
</fs>
<ok to="check_exist_on_hdfs_summaries"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_summaries">
<switch>
<case to="ImportOrcidSummaries">
${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))}
</case>
<default to="DownloadSummaries" />
</switch>
</decision>
<action name="DownloadSummaries">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_0}</argument>
<capture-output/>
</shell>
<ok to="ImportOrcidSummaries"/>
<error to="Kill"/>
</action>
<action name="ImportOrcidSummaries">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
<arg>-o</arg><arg>summaries/output/</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -9,7 +9,7 @@
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4g</value>
<value>-Xmx2g</value>
</property>
<property>
<name>jobTracker</name>

View File

@ -1,4 +1,4 @@
<workflow-app name="Import Orcid Activities" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="Gen Orcid Works-no-doi From Activities" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
@ -6,67 +6,67 @@
</property>
<property>
<name>shell_cmd_0</name>
<value>wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz ; rm -f /tmp/ORCID_2019_activites_0.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/25002232 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_0.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_0.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_0.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 0</description>
</property>
<property>
<name>shell_cmd_1</name>
<value>wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz ; rm -f /tmp/ORCID_2019_activites_1.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/25002088 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_1.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_1.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_1.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 1</description>
</property>
<property>
<name>shell_cmd_2</name>
<value>wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz ; rm -f /tmp/ORCID_2019_activites_2.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/25000596 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_2.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_2.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_2.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 2</description>
</property>
<property>
<name>shell_cmd_3</name>
<value>wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz ; rm -f /tmp/ORCID_2019_activites_3.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/25015150 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_3.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_3.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_3.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 3</description>
</property>
<property>
<name>shell_cmd_4</name>
<value>wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz ; rm -f /tmp/ORCID_2019_activites_4.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/25033643 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_4.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_4.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_4.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 4</description>
</property>
<property>
<name>shell_cmd_5</name>
<value>wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz ; rm -f /tmp/ORCID_2019_activites_5.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/25005483 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_5.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_5.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_5.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 5</description>
</property>
<property>
<name>shell_cmd_6</name>
<value>wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz ; rm -f /tmp/ORCID_2019_activites_6.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/25005425 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_6.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_6.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_6.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 6</description>
</property>
<property>
<name>shell_cmd_7</name>
<value>wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz ; rm -f /tmp/ORCID_2019_activites_7.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/25012016 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_7.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_7.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_7.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 7</description>
</property>
<property>
<name>shell_cmd_8</name>
<value>wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz ; rm -f /tmp/ORCID_2019_activites_8.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/25012079 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_8.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_8.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_8.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 8</description>
</property>
<property>
<name>shell_cmd_9</name>
<value>wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz ; rm -f /tmp/ORCID_2019_activites_9.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/25010727 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_9.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_9.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_9.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 9</description>
</property>
<property>
<name>shell_cmd_X</name>
<value>wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz ; rm -f /tmp/ORCID_2019_activites_X.tar.gz
<value>wget -O /tmp/ORCID_2020_10_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/25011025 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_X.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_X.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_X.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file X</description>
</property>
@ -82,11 +82,11 @@
<fs>
<delete path='${workingPath}/no_doi_works/*'/>
</fs>
<ok to="fork_gen_orcid_author_work"/>
<ok to="fork_check_download_files"/>
<error to="Kill"/>
</action>
<fork name = "fork_gen_orcid_author_work">
<fork name = "fork_check_download_files">
<path start = "check_exist_on_hdfs_activities_0"/>
<path start = "check_exist_on_hdfs_activities_1"/>
<path start = "check_exist_on_hdfs_activities_2"/>
@ -102,8 +102,8 @@
<decision name="check_exist_on_hdfs_activities_0">
<switch>
<case to="GenOrcidAuthorWork_0">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_0.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_0.tar.gz'))}
</case>
<default to="Download_0" />
</switch>
@ -118,7 +118,7 @@
<argument>${shell_cmd_0}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_0"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -129,7 +129,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_0.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_0.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_0.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -139,8 +139,8 @@
<decision name="check_exist_on_hdfs_activities_1">
<switch>
<case to="GenOrcidAuthorWork_1">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_1.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_1.tar.gz'))}
</case>
<default to="Download_1" />
</switch>
@ -155,7 +155,7 @@
<argument>${shell_cmd_1}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_1"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -166,7 +166,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_1.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_1.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_1.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -176,8 +176,8 @@
<decision name="check_exist_on_hdfs_activities_2">
<switch>
<case to="GenOrcidAuthorWork_2">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_2.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_2.tar.gz'))}
</case>
<default to="Download_2" />
</switch>
@ -192,7 +192,7 @@
<argument>${shell_cmd_2}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_2"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -203,7 +203,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_2.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_2.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_2.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -213,8 +213,8 @@
<decision name="check_exist_on_hdfs_activities_3">
<switch>
<case to="GenOrcidAuthorWork_3">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_3.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_3.tar.gz'))}
</case>
<default to="Download_3" />
</switch>
@ -229,7 +229,7 @@
<argument>${shell_cmd_3}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_3"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -240,7 +240,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_3.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_3.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_3.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -250,8 +250,8 @@
<decision name="check_exist_on_hdfs_activities_4">
<switch>
<case to="GenOrcidAuthorWork_4">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_4.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_4.tar.gz'))}
</case>
<default to="Download_4" />
</switch>
@ -266,7 +266,7 @@
<argument>${shell_cmd_4}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_4"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -277,7 +277,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_4.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_4.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_4.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -287,8 +287,8 @@
<decision name="check_exist_on_hdfs_activities_5">
<switch>
<case to="GenOrcidAuthorWork_5">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_5.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_5.tar.gz'))}
</case>
<default to="Download_5" />
</switch>
@ -303,7 +303,7 @@
<argument>${shell_cmd_5}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_5"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -314,7 +314,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_5.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_5.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_5.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -324,8 +324,8 @@
<decision name="check_exist_on_hdfs_activities_6">
<switch>
<case to="GenOrcidAuthorWork_6">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_6.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_6.tar.gz'))}
</case>
<default to="Download_6" />
</switch>
@ -340,7 +340,7 @@
<argument>${shell_cmd_6}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_6"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -351,7 +351,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_6.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_6.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_6.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -362,8 +362,8 @@
<decision name="check_exist_on_hdfs_activities_7">
<switch>
<case to="GenOrcidAuthorWork_7">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_7.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_7.tar.gz'))}
</case>
<default to="Download_7" />
</switch>
@ -378,7 +378,7 @@
<argument>${shell_cmd_7}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_7"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -389,7 +389,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_7.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_7.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_7.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -399,8 +399,8 @@
<decision name="check_exist_on_hdfs_activities_8">
<switch>
<case to="GenOrcidAuthorWork_8">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_8.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_8.tar.gz'))}
</case>
<default to="Download_8" />
</switch>
@ -415,7 +415,7 @@
<argument>${shell_cmd_8}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_8"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -426,7 +426,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_8.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_8.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_8.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -436,8 +436,8 @@
<decision name="check_exist_on_hdfs_activities_9">
<switch>
<case to="GenOrcidAuthorWork_9">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_9.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_9.tar.gz'))}
</case>
<default to="Download_9" />
</switch>
@ -452,7 +452,7 @@
<argument>${shell_cmd_9}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_9"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -463,7 +463,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_9.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_9.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_9.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -473,8 +473,8 @@
<decision name="check_exist_on_hdfs_activities_X">
<switch>
<case to="GenOrcidAuthorWork_X">
${fs:exists(concat(workingPath,'/ORCID_2019_activites_X.tar.gz'))}
<case to="wait_download_phase_node">
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_X.tar.gz'))}
</case>
<default to="Download_X" />
</switch>
@ -489,7 +489,7 @@
<argument>${shell_cmd_X}</argument>
<capture-output/>
</shell>
<ok to="GenOrcidAuthorWork_X"/>
<ok to="wait_download_phase_node"/>
<error to="Kill"/>
</action>
@ -500,7 +500,7 @@
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_X.tar.gz</arg>
<arg>-f</arg><arg>ORCID_2020_10_activites_X.tar.gz</arg>
<arg>-ow</arg><arg>no_doi_works/works_X.seq</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
</java>
@ -508,7 +508,35 @@
<error to="Kill"/>
</action>
<join name = "wait_download_phase_node" to = "fork_gen_orcid_author_work"/>
<fork name = "fork_gen_orcid_author_work">
<path start = "GenOrcidAuthorWork_0"/>
<path start = "GenOrcidAuthorWork_1"/>
<path start = "GenOrcidAuthorWork_2"/>
<path start = "GenOrcidAuthorWork_3"/>
<path start = "GenOrcidAuthorWork_4"/>
<path start = "GenOrcidAuthorWork_5"/>
<path start = "GenOrcidAuthorWork_6"/>
<path start = "GenOrcidAuthorWork_7"/>
<path start = "GenOrcidAuthorWork_8"/>
<path start = "GenOrcidAuthorWork_9"/>
<path start = "GenOrcidAuthorWork_X"/>
</fork>
<join name = "join_node" to = "End"/>
<!-- <join name = "join_node" to = "fork_gen_orcid_author_work_2"/>-->
<!-- <fork name = "fork_gen_orcid_author_work_2">-->
<!-- <path start = "GenOrcidAuthorWork_6"/>-->
<!-- <path start = "GenOrcidAuthorWork_7"/>-->
<!-- <path start = "GenOrcidAuthorWork_8"/>-->
<!-- <path start = "GenOrcidAuthorWork_9"/>-->
<!-- <path start = "GenOrcidAuthorWork_X"/>-->
<!-- </fork>-->
<!-- <join name = "join_node_2" to = "End"/>-->
<end name="End"/>
</workflow-app>

View File

@ -19,4 +19,8 @@
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx16g</value>
</property>
</configuration>

View File

@ -1,4 +1,4 @@
<workflow-app name="Import Orcid Summaries" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="Gen Orcid Authors From Summaries" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
@ -6,7 +6,7 @@
</property>
<property>
<name>shell_cmd_0</name>
<value>wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz
<value>wget -O /tmp/ORCID_2020_10_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/25032905 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_summaries.tar.gz /data/orcid_activities_2020/ORCID_2020_10_summaries.tar.gz ; rm -f /tmp/ORCID_2020_10_summaries.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid summaries</description>
</property>
@ -21,8 +21,8 @@
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/summaries/output'/>
<mkdir path='${workingPath}/summaries/output'/>
<delete path='${workingPath}/authors'/>
<mkdir path='${workingPath}/authors'/>
</fs>
<ok to="check_exist_on_hdfs_summaries"/>
<error to="Kill"/>
@ -31,7 +31,7 @@
<decision name="check_exist_on_hdfs_summaries">
<switch>
<case to="ImportOrcidSummaries">
${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))}
${fs:exists(concat(workingPath,'/ORCID_2020_10_summaries.tar.gz'))}
</case>
<default to="DownloadSummaries" />
</switch>
@ -57,8 +57,8 @@
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
<arg>-o</arg><arg>summaries/output/</arg>
<arg>-f</arg><arg>ORCID_2020_10_summaries.tar.gz</arg>
<arg>-o</arg><arg>authors/</arg>
</java>
<ok to="End"/>
<error to="Kill"/>

View File

@ -59,7 +59,7 @@
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/no_doi_enriched_works/output'/>
<delete path='${workingPath}/no_doi_dataset'/>
</fs>
<ok to="GenOrcidNoDoiDataset"/>
<error to="Kill"/>
@ -85,7 +85,7 @@
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>-</arg>
<arg>-ow</arg><arg>no_doi_works/</arg>
<arg>-oew</arg><arg>no_doi_enriched_works/output</arg>
<arg>-oew</arg><arg>no_doi_dataset</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>

View File

@ -38,8 +38,8 @@ public class OrcidClientTest {
@Test
public void downloadTest() throws Exception {
String record = testDownloadRecord("0000-0002-2536-4498");
File f = new File("/tmp/downloaded_0000-0002-2536-4498.xml");
String record = testDownloadRecord("0000-0001-6163-2042");
File f = new File("/tmp/downloaded_0000-0001-6163-2042.xml");
OutputStream outStream = new FileOutputStream(f);
IOUtils.write(record.getBytes(), outStream);
System.out.println("saved to tmp");

View File

@ -2,15 +2,20 @@
package eu.dnetlib.doiboost.orcidnodoi.xml;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.text.Normalizer;
import java.util.*;
import javax.validation.constraints.AssertTrue;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.JaccardSimilarity;
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
import org.junit.jupiter.api.Test;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -41,7 +46,6 @@ public class OrcidNoDoiTest {
String orcidIdA = "0000-0003-2760-1191";
@Test
// @Ignore
public void readPublicationFieldsTest()
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
logger.info("running loadPublicationFieldsTest ....");
@ -95,8 +99,7 @@ public class OrcidNoDoiTest {
}
@Test
// @Ignore
private void authorMatchTest() throws Exception {
public void authorMatchTest() throws Exception {
logger.info("running authorSimpleMatchTest ....");
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
AuthorData author = new AuthorData();
@ -121,9 +124,60 @@ public class OrcidNoDoiTest {
logger.error("parsing xml", e);
}
assertNotNull(workData);
Contributor a = workData.getContributors().get(0);
assertTrue(a.getCreditName().equals("Abdel-Dayem K"));
AuthorMatcher.match(author, workData.getContributors());
GsonBuilder builder = new GsonBuilder();
Gson gson = builder.create();
logger.info(gson.toJson(workData));
assertTrue(workData.getContributors().size() == 6);
Contributor c = workData.getContributors().get(0);
assertTrue(c.getOid().equals("0000-0003-2760-1191"));
assertTrue(c.getName().equals("Khairy"));
assertTrue(c.getSurname().equals("Abdel Dayem"));
assertTrue(c.getCreditName().equals("Abdel-Dayem K"));
}
@Test
public void readContributorsTest()
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
logger.info("running loadPublicationFieldsTest ....");
String xml = IOUtils
.toString(
OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml"));
if (xml == null) {
logger.info("Resource not found");
}
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
if (p == null) {
logger.info("XMLRecordParserNoDoi null");
}
WorkDataNoDoi workData = null;
try {
workData = p.VTDParseWorkData(xml.getBytes());
} catch (Exception e) {
logger.error("parsing xml", e);
}
assertNotNull(workData.getContributors());
assertTrue(workData.getContributors().size() == 5);
assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName()));
assertTrue(workData.getContributors().get(0).getSequence().equals("seq0"));
assertTrue(workData.getContributors().get(0).getRole().equals("role0"));
assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence()));
assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole()));
assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2"));
assertTrue(workData.getContributors().get(2).getSequence().equals("seq2"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole()));
assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence()));
assertTrue(workData.getContributors().get(3).getRole().equals("role3"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName()));
assertTrue(workData.getContributors().get(4).getSequence().equals("seq4"));
assertTrue(workData.getContributors().get(4).getRole().equals("role4"));
}
}

View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:contributor-attributes>
<work:contributor-sequence>seq0</work:contributor-sequence>
<work:contributor-role>role0</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname1</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>creditname2</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq2</work:contributor-sequence>
<work:contributor-role></work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname3</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence></work:contributor-sequence>
<work:contributor-role>role3</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name></work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq4</work:contributor-sequence>
<work:contributor-role>role4</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

12
pom.xml
View File

@ -458,6 +458,18 @@
<version>${jsonschemagenerator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>${common.text.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${org.apache.httpcomponents.version}</version>
</dependency>
</dependencies>
</dependencyManagement>