orcid-no-doi #43
|
@ -51,7 +51,6 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpclient</artifactId>
|
<artifactId>httpclient</artifactId>
|
||||||
<version>${org.apache.httpcomponents.version}</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
@ -87,7 +86,6 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-text</artifactId>
|
<artifactId>commons-text</artifactId>
|
||||||
<version>${common.text.version}</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|||||||
|
|
||||||
|
|
|
@ -62,7 +62,7 @@ public class OrcidDSManager {
|
||||||
.toString(
|
.toString(
|
||||||
OrcidDSManager.class
|
OrcidDSManager.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
|
"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
hdfsServerUri = parser.get("hdfsServerUri");
|
hdfsServerUri = parser.get("hdfsServerUri");
|
||||||
|
|
|
@ -73,7 +73,7 @@ public class ActivitiesDumpReader {
|
||||||
SequenceFile.Writer.valueClass(Text.class))) {
|
SequenceFile.Writer.valueClass(Text.class))) {
|
||||||
while ((entry = tais.getNextTarEntry()) != null) {
|
while ((entry = tais.getNextTarEntry()) != null) {
|
||||||
String filename = entry.getName();
|
String filename = entry.getName();
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
try {
|
try {
|
||||||
if (entry.isDirectory() || !filename.contains("works")) {
|
if (entry.isDirectory() || !filename.contains("works")) {
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ public class ActivitiesDumpReader {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
|
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
|
||||||
// tarInput
|
// tarInput
|
||||||
String line;
|
String line;
|
||||||
StringBuffer buffer = new StringBuffer();
|
buffer = new StringBuffer();
|
||||||
while ((line = br.readLine()) != null) {
|
while ((line = br.readLine()) != null) {
|
||||||
buffer.append(line);
|
buffer.append(line);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager {
|
||||||
.toString(
|
.toString(
|
||||||
GenOrcidAuthorWork.class
|
GenOrcidAuthorWork.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json")));
|
"/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
hdfsServerUri = parser.get("hdfsServerUri");
|
hdfsServerUri = parser.get("hdfsServerUri");
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class SparkGenEnrichedOrcidWorks {
|
||||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> summariesRDD = sc
|
JavaPairRDD<Text, Text> summariesRDD = sc
|
||||||
.sequenceFile(workingPath + "summaries/output/authors.seq", Text.class, Text.class);
|
.sequenceFile(workingPath + "authors/authors.seq", Text.class, Text.class);
|
||||||
Dataset<AuthorData> summariesDataset = spark
|
Dataset<AuthorData> summariesDataset = spark
|
||||||
.createDataset(
|
.createDataset(
|
||||||
summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(),
|
summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(),
|
||||||
|
@ -96,8 +96,8 @@ public class SparkGenEnrichedOrcidWorks {
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
|
Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.toJavaRDD();
|
.toJavaRDD();
|
||||||
enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath);
|
// enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath);
|
||||||
logger.info("Works enriched data saved");
|
logger.info("Enriched works RDD ready.");
|
||||||
|
|
||||||
final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications");
|
final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications");
|
||||||
final LongAccumulator enrichedPublications = spark
|
final LongAccumulator enrichedPublications = spark
|
||||||
|
@ -132,7 +132,7 @@ public class SparkGenEnrichedOrcidWorks {
|
||||||
.write()
|
.write()
|
||||||
.format("parquet")
|
.format("parquet")
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.save(workingPath + "no_doi_dataset/output");
|
.save(workingPath + outputEnrichedWorksPath);
|
||||||
|
|
||||||
logger.info("parsedPublications: " + parsedPublications.value().toString());
|
logger.info("parsedPublications: " + parsedPublications.value().toString());
|
||||||
logger.info("enrichedPublications: " + enrichedPublications.value().toString());
|
logger.info("enrichedPublications: " + enrichedPublications.value().toString());
|
||||||
|
|
|
@ -5,6 +5,7 @@ import java.io.IOException;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
|
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -40,7 +41,7 @@ public class AuthorMatcher {
|
||||||
int matchCounter = 0;
|
int matchCounter = 0;
|
||||||
List<Integer> matchCounters = Arrays.asList(matchCounter);
|
List<Integer> matchCounters = Arrays.asList(matchCounter);
|
||||||
Contributor contributor = null;
|
Contributor contributor = null;
|
||||||
contributors.forEach(c -> {
|
contributors.stream().filter(c -> !StringUtils.isBlank(c.getCreditName())).forEach(c -> {
|
||||||
if (simpleMatch(c.getCreditName(), author.getName()) ||
|
if (simpleMatch(c.getCreditName(), author.getName()) ||
|
||||||
simpleMatch(c.getCreditName(), author.getSurname()) ||
|
simpleMatch(c.getCreditName(), author.getSurname()) ||
|
||||||
simpleMatch(c.getCreditName(), author.getOtherName())) {
|
simpleMatch(c.getCreditName(), author.getOtherName())) {
|
||||||
|
@ -54,6 +55,7 @@ public class AuthorMatcher {
|
||||||
Optional<Contributor> optCon = contributors
|
Optional<Contributor> optCon = contributors
|
||||||
.stream()
|
.stream()
|
||||||
.filter(c -> c.isSimpleMatch())
|
.filter(c -> c.isSimpleMatch())
|
||||||
|
.filter(c -> !StringUtils.isBlank(c.getCreditName()))
|
||||||
.map(c -> {
|
.map(c -> {
|
||||||
c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName()));
|
c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName()));
|
||||||
return c;
|
return c;
|
||||||
|
|
|
@ -183,39 +183,34 @@ public class XMLRecordParserNoDoi {
|
||||||
private static List<Contributor> getContributors(VTDGen vg, VTDNav vn, AutoPilot ap)
|
private static List<Contributor> getContributors(VTDGen vg, VTDNav vn, AutoPilot ap)
|
||||||
throws XPathParseException, NavException, XPathEvalException {
|
throws XPathParseException, NavException, XPathEvalException {
|
||||||
List<Contributor> contributors = new ArrayList<Contributor>();
|
List<Contributor> contributors = new ArrayList<Contributor>();
|
||||||
int nameIndex = 0;
|
ap.selectXPath("//work:contributors/work:contributor");
|
||||||
ap.selectXPath("//work:contributor/work:credit-name");
|
|
||||||
while (ap.evalXPath() != -1) {
|
while (ap.evalXPath() != -1) {
|
||||||
Contributor contributor = new Contributor();
|
Contributor contributor = new Contributor();
|
||||||
int t = vn.getText();
|
if (vn.toElement(VTDNav.FIRST_CHILD, "work:credit-name")) {
|
||||||
if (t >= 0) {
|
int val = vn.getText();
|
||||||
contributor.setCreditName(vn.toNormalizedString(t));
|
if (val != -1) {
|
||||||
contributors.add(nameIndex, contributor);
|
contributor.setCreditName(vn.toNormalizedString(val));
|
||||||
nameIndex++;
|
}
|
||||||
|
vn.toElement(VTDNav.PARENT);
|
||||||
}
|
}
|
||||||
}
|
if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-attributes")) {
|
||||||
if (contributors.size() == 0) {
|
if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-sequence")) {
|
||||||
return contributors;
|
int val = vn.getText();
|
||||||
}
|
if (val != -1) {
|
||||||
|
contributor.setSequence(vn.toNormalizedString(val));
|
||||||
int sequenceIndex = 0;
|
}
|
||||||
ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-sequence");
|
vn.toElement(VTDNav.PARENT);
|
||||||
while (ap.evalXPath() != -1) {
|
}
|
||||||
int t = vn.getText();
|
if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-role")) {
|
||||||
if (t >= 0) {
|
int val = vn.getText();
|
||||||
contributors.get(sequenceIndex).setSequence(vn.toNormalizedString(t));
|
if (val != -1) {
|
||||||
sequenceIndex++;
|
contributor.setRole(vn.toNormalizedString(val));
|
||||||
}
|
}
|
||||||
}
|
vn.toElement(VTDNav.PARENT);
|
||||||
|
}
|
||||||
int roleIndex = 0;
|
vn.toElement(VTDNav.PARENT);
|
||||||
ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-role");
|
|
||||||
while (ap.evalXPath() != -1) {
|
|
||||||
int t = vn.getText();
|
|
||||||
if (t >= 0) {
|
|
||||||
contributors.get(roleIndex).setRole(vn.toNormalizedString(t));
|
|
||||||
roleIndex++;
|
|
||||||
}
|
}
|
||||||
|
contributors.add(contributor);
|
||||||
}
|
}
|
||||||
return contributors;
|
return contributors;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
[
|
||||||
|
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
|
||||||
|
{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true},
|
||||||
|
{"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true},
|
||||||
|
{"paramName":"ow", "paramLongName":"outputWorksPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true},
|
||||||
|
{"paramName":"oew", "paramLongName":"outputEnrichedWorksPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true}
|
||||||
|
]
|
|
@ -1,42 +0,0 @@
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>jobTracker</name>
|
|
||||||
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>nameNode</name>
|
|
||||||
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>oozie.use.system.libpath</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>oozie.action.sharelib.for.spark</name>
|
|
||||||
<value>spark2</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
|
||||||
<value>true</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hive_metastore_uris</name>
|
|
||||||
<value>thrift://hadoop-edge2.garr-pa1.d4science.org:9083</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>spark2YarnHistoryServerAddress</name>
|
|
||||||
<value>http://hadoop-edge1.garr-pa1.d4science.org:18089/</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>spark2EventLogDir</name>
|
|
||||||
<value>/user/spark/spark2ApplicationHistory</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>spark2ExtraListeners</name>
|
|
||||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>spark2SqlQueryExecutionListeners</name>
|
|
||||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
|
@ -1,67 +0,0 @@
|
||||||
<workflow-app name="Import Orcid Summaries" xmlns="uri:oozie:workflow:0.5">
|
|
||||||
<parameters>
|
|
||||||
<property>
|
|
||||||
<name>workingPath</name>
|
|
||||||
<description>the working dir base path</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>shell_cmd_0</name>
|
|
||||||
<value>wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz
|
|
||||||
</value>
|
|
||||||
<description>the shell command that downloads and puts to hdfs orcid summaries</description>
|
|
||||||
</property>
|
|
||||||
</parameters>
|
|
||||||
|
|
||||||
<start to="ResetWorkingPath"/>
|
|
||||||
|
|
||||||
|
|
||||||
<kill name="Kill">
|
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
|
||||||
</kill>
|
|
||||||
|
|
||||||
<action name="ResetWorkingPath">
|
|
||||||
<fs>
|
|
||||||
<delete path='${workingPath}/summaries/output'/>
|
|
||||||
<mkdir path='${workingPath}/summaries/output'/>
|
|
||||||
</fs>
|
|
||||||
<ok to="check_exist_on_hdfs_summaries"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_summaries">
|
|
||||||
<switch>
|
|
||||||
<case to="ImportOrcidSummaries">
|
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))}
|
|
||||||
</case>
|
|
||||||
<default to="DownloadSummaries" />
|
|
||||||
</switch>
|
|
||||||
</decision>
|
|
||||||
|
|
||||||
<action name="DownloadSummaries">
|
|
||||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<exec>bash</exec>
|
|
||||||
<argument>-c</argument>
|
|
||||||
<argument>${shell_cmd_0}</argument>
|
|
||||||
<capture-output/>
|
|
||||||
</shell>
|
|
||||||
<ok to="ImportOrcidSummaries"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="ImportOrcidSummaries">
|
|
||||||
<java>
|
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
|
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
|
||||||
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
|
|
||||||
<arg>-o</arg><arg>summaries/output/</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="End"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
<end name="End"/>
|
|
||||||
</workflow-app>
|
|
|
@ -9,7 +9,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>oozie.launcher.mapreduce.map.java.opts</name>
|
<name>oozie.launcher.mapreduce.map.java.opts</name>
|
||||||
<value>-Xmx4g</value>
|
<value>-Xmx2g</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>jobTracker</name>
|
<name>jobTracker</name>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="Import Orcid Activities" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="Gen Orcid Works-no-doi From Activities" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>workingPath</name>
|
<name>workingPath</name>
|
||||||
|
@ -6,70 +6,70 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_0</name>
|
<name>shell_cmd_0</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz ; rm -f /tmp/ORCID_2019_activites_0.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/25002232 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_0.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_0.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_0.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 0</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 0</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_1</name>
|
<name>shell_cmd_1</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz ; rm -f /tmp/ORCID_2019_activites_1.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/25002088 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_1.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_1.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_1.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 1</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 1</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_2</name>
|
<name>shell_cmd_2</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz ; rm -f /tmp/ORCID_2019_activites_2.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/25000596 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_2.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_2.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_2.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 2</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 2</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_3</name>
|
<name>shell_cmd_3</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz ; rm -f /tmp/ORCID_2019_activites_3.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/25015150 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_3.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_3.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_3.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 3</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 3</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_4</name>
|
<name>shell_cmd_4</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz ; rm -f /tmp/ORCID_2019_activites_4.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/25033643 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_4.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_4.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_4.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 4</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 4</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_5</name>
|
<name>shell_cmd_5</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz ; rm -f /tmp/ORCID_2019_activites_5.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/25005483 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_5.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_5.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_5.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 5</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 5</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_6</name>
|
<name>shell_cmd_6</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz ; rm -f /tmp/ORCID_2019_activites_6.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/25005425 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_6.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_6.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_6.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 6</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 6</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_7</name>
|
<name>shell_cmd_7</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz ; rm -f /tmp/ORCID_2019_activites_7.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/25012016 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_7.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_7.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_7.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 7</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 7</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_8</name>
|
<name>shell_cmd_8</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz ; rm -f /tmp/ORCID_2019_activites_8.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/25012079 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_8.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_8.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_8.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 8</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 8</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_9</name>
|
<name>shell_cmd_9</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz ; rm -f /tmp/ORCID_2019_activites_9.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/25010727 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_9.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_9.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_9.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file 9</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file 9</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_X</name>
|
<name>shell_cmd_X</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz ; rm -f /tmp/ORCID_2019_activites_X.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/25011025 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_X.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_X.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_X.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid activity file X</description>
|
<description>the shell command that downloads and puts to hdfs orcid activity file X</description>
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="ResetWorkingPath"/>
|
<start to="ResetWorkingPath"/>
|
||||||
|
@ -82,11 +82,11 @@
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingPath}/no_doi_works/*'/>
|
<delete path='${workingPath}/no_doi_works/*'/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="fork_gen_orcid_author_work"/>
|
<ok to="fork_check_download_files"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<fork name = "fork_gen_orcid_author_work">
|
<fork name = "fork_check_download_files">
|
||||||
<path start = "check_exist_on_hdfs_activities_0"/>
|
<path start = "check_exist_on_hdfs_activities_0"/>
|
||||||
<path start = "check_exist_on_hdfs_activities_1"/>
|
<path start = "check_exist_on_hdfs_activities_1"/>
|
||||||
<path start = "check_exist_on_hdfs_activities_2"/>
|
<path start = "check_exist_on_hdfs_activities_2"/>
|
||||||
|
@ -102,8 +102,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_0">
|
<decision name="check_exist_on_hdfs_activities_0">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_0">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_0.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_0.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_0" />
|
<default to="Download_0" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -118,7 +118,7 @@
|
||||||
<argument>${shell_cmd_0}</argument>
|
<argument>${shell_cmd_0}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_0"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_0.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_0.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_0.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_0.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -139,8 +139,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_1">
|
<decision name="check_exist_on_hdfs_activities_1">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_1">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_1.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_1.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_1" />
|
<default to="Download_1" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -155,7 +155,7 @@
|
||||||
<argument>${shell_cmd_1}</argument>
|
<argument>${shell_cmd_1}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_1"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_1.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_1.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_1.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_1.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -176,8 +176,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_2">
|
<decision name="check_exist_on_hdfs_activities_2">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_2">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_2.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_2.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_2" />
|
<default to="Download_2" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -192,7 +192,7 @@
|
||||||
<argument>${shell_cmd_2}</argument>
|
<argument>${shell_cmd_2}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_2"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -203,7 +203,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_2.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_2.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_2.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_2.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -213,8 +213,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_3">
|
<decision name="check_exist_on_hdfs_activities_3">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_3">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_3.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_3.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_3" />
|
<default to="Download_3" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -229,7 +229,7 @@
|
||||||
<argument>${shell_cmd_3}</argument>
|
<argument>${shell_cmd_3}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_3"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -240,7 +240,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_3.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_3.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_3.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_3.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -250,8 +250,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_4">
|
<decision name="check_exist_on_hdfs_activities_4">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_4">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_4.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_4.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_4" />
|
<default to="Download_4" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -266,7 +266,7 @@
|
||||||
<argument>${shell_cmd_4}</argument>
|
<argument>${shell_cmd_4}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_4"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -277,7 +277,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_4.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_4.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_4.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_4.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -287,8 +287,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_5">
|
<decision name="check_exist_on_hdfs_activities_5">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_5">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_5.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_5.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_5" />
|
<default to="Download_5" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -303,7 +303,7 @@
|
||||||
<argument>${shell_cmd_5}</argument>
|
<argument>${shell_cmd_5}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_5"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -314,7 +314,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_5.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_5.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_5.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_5.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -324,8 +324,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_6">
|
<decision name="check_exist_on_hdfs_activities_6">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_6">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_6.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_6.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_6" />
|
<default to="Download_6" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -340,7 +340,7 @@
|
||||||
<argument>${shell_cmd_6}</argument>
|
<argument>${shell_cmd_6}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_6"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -351,7 +351,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_6.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_6.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_6.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_6.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -362,8 +362,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_7">
|
<decision name="check_exist_on_hdfs_activities_7">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_7">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_7.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_7.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_7" />
|
<default to="Download_7" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -378,7 +378,7 @@
|
||||||
<argument>${shell_cmd_7}</argument>
|
<argument>${shell_cmd_7}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_7"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -389,7 +389,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_7.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_7.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_7.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_7.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -399,8 +399,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_8">
|
<decision name="check_exist_on_hdfs_activities_8">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_8">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_8.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_8.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_8" />
|
<default to="Download_8" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -415,7 +415,7 @@
|
||||||
<argument>${shell_cmd_8}</argument>
|
<argument>${shell_cmd_8}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_8"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -426,7 +426,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_8.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_8.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_8.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_8.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -436,8 +436,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_9">
|
<decision name="check_exist_on_hdfs_activities_9">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_9">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_9.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_9.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_9" />
|
<default to="Download_9" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -452,7 +452,7 @@
|
||||||
<argument>${shell_cmd_9}</argument>
|
<argument>${shell_cmd_9}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_9"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -463,7 +463,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_9.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_9.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_9.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_9.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -473,8 +473,8 @@
|
||||||
|
|
||||||
<decision name="check_exist_on_hdfs_activities_X">
|
<decision name="check_exist_on_hdfs_activities_X">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="GenOrcidAuthorWork_X">
|
<case to="wait_download_phase_node">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_activites_X.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_X.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="Download_X" />
|
<default to="Download_X" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -489,7 +489,7 @@
|
||||||
<argument>${shell_cmd_X}</argument>
|
<argument>${shell_cmd_X}</argument>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="GenOrcidAuthorWork_X"/>
|
<ok to="wait_download_phase_node"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
@ -500,7 +500,7 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
<main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_activites_X.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_activites_X.tar.gz</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/works_X.seq</arg>
|
<arg>-ow</arg><arg>no_doi_works/works_X.seq</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
<arg>-oew</arg><arg>no_doi_enriched_works/</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -508,7 +508,35 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
<join name = "wait_download_phase_node" to = "fork_gen_orcid_author_work"/>
|
||||||
|
|
||||||
|
<fork name = "fork_gen_orcid_author_work">
|
||||||
|
<path start = "GenOrcidAuthorWork_0"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_1"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_2"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_3"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_4"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_5"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_6"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_7"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_8"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_9"/>
|
||||||
|
<path start = "GenOrcidAuthorWork_X"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
<join name = "join_node" to = "End"/>
|
<join name = "join_node" to = "End"/>
|
||||||
|
|
||||||
|
<!-- <join name = "join_node" to = "fork_gen_orcid_author_work_2"/>-->
|
||||||
|
|
||||||
|
<!-- <fork name = "fork_gen_orcid_author_work_2">-->
|
||||||
|
<!-- <path start = "GenOrcidAuthorWork_6"/>-->
|
||||||
|
<!-- <path start = "GenOrcidAuthorWork_7"/>-->
|
||||||
|
<!-- <path start = "GenOrcidAuthorWork_8"/>-->
|
||||||
|
<!-- <path start = "GenOrcidAuthorWork_9"/>-->
|
||||||
|
<!-- <path start = "GenOrcidAuthorWork_X"/>-->
|
||||||
|
<!-- </fork>-->
|
||||||
|
|
||||||
|
<!-- <join name = "join_node_2" to = "End"/>-->
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -19,4 +19,8 @@
|
||||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
<value>true</value>
|
<value>true</value>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.map.java.opts</name>
|
||||||
|
<value>-Xmx16g</value>
|
||||||
|
</property>
|
||||||
</configuration>
|
</configuration>
|
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="Import Orcid Summaries" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="Gen Orcid Authors From Summaries" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>workingPath</name>
|
<name>workingPath</name>
|
||||||
|
@ -6,7 +6,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>shell_cmd_0</name>
|
<name>shell_cmd_0</name>
|
||||||
<value>wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz
|
<value>wget -O /tmp/ORCID_2020_10_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/25032905 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_summaries.tar.gz /data/orcid_activities_2020/ORCID_2020_10_summaries.tar.gz ; rm -f /tmp/ORCID_2020_10_summaries.tar.gz
|
||||||
</value>
|
</value>
|
||||||
<description>the shell command that downloads and puts to hdfs orcid summaries</description>
|
<description>the shell command that downloads and puts to hdfs orcid summaries</description>
|
||||||
</property>
|
</property>
|
||||||
|
@ -21,8 +21,8 @@
|
||||||
|
|
||||||
<action name="ResetWorkingPath">
|
<action name="ResetWorkingPath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingPath}/summaries/output'/>
|
<delete path='${workingPath}/authors'/>
|
||||||
<mkdir path='${workingPath}/summaries/output'/>
|
<mkdir path='${workingPath}/authors'/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="check_exist_on_hdfs_summaries"/>
|
<ok to="check_exist_on_hdfs_summaries"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -31,7 +31,7 @@
|
||||||
<decision name="check_exist_on_hdfs_summaries">
|
<decision name="check_exist_on_hdfs_summaries">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="ImportOrcidSummaries">
|
<case to="ImportOrcidSummaries">
|
||||||
${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))}
|
${fs:exists(concat(workingPath,'/ORCID_2020_10_summaries.tar.gz'))}
|
||||||
</case>
|
</case>
|
||||||
<default to="DownloadSummaries" />
|
<default to="DownloadSummaries" />
|
||||||
</switch>
|
</switch>
|
||||||
|
@ -57,8 +57,8 @@
|
||||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
|
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
|
||||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
|
<arg>-f</arg><arg>ORCID_2020_10_summaries.tar.gz</arg>
|
||||||
<arg>-o</arg><arg>summaries/output/</arg>
|
<arg>-o</arg><arg>authors/</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -59,7 +59,7 @@
|
||||||
|
|
||||||
<action name="ResetWorkingPath">
|
<action name="ResetWorkingPath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingPath}/no_doi_enriched_works/output'/>
|
<delete path='${workingPath}/no_doi_dataset'/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="GenOrcidNoDoiDataset"/>
|
<ok to="GenOrcidNoDoiDataset"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -85,7 +85,7 @@
|
||||||
<arg>-n</arg><arg>${nameNode}</arg>
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
<arg>-f</arg><arg>-</arg>
|
<arg>-f</arg><arg>-</arg>
|
||||||
<arg>-ow</arg><arg>no_doi_works/</arg>
|
<arg>-ow</arg><arg>no_doi_works/</arg>
|
||||||
<arg>-oew</arg><arg>no_doi_enriched_works/output</arg>
|
<arg>-oew</arg><arg>no_doi_dataset</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
|
@ -38,8 +38,8 @@ public class OrcidClientTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void downloadTest() throws Exception {
|
public void downloadTest() throws Exception {
|
||||||
String record = testDownloadRecord("0000-0002-2536-4498");
|
String record = testDownloadRecord("0000-0001-6163-2042");
|
||||||
File f = new File("/tmp/downloaded_0000-0002-2536-4498.xml");
|
File f = new File("/tmp/downloaded_0000-0001-6163-2042.xml");
|
||||||
OutputStream outStream = new FileOutputStream(f);
|
OutputStream outStream = new FileOutputStream(f);
|
||||||
IOUtils.write(record.getBytes(), outStream);
|
IOUtils.write(record.getBytes(), outStream);
|
||||||
System.out.println("saved to tmp");
|
System.out.println("saved to tmp");
|
||||||
|
|
|
@ -2,15 +2,20 @@
|
||||||
package eu.dnetlib.doiboost.orcidnodoi.xml;
|
package eu.dnetlib.doiboost.orcidnodoi.xml;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import javax.validation.constraints.AssertTrue;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.text.similarity.JaccardSimilarity;
|
import org.apache.commons.text.similarity.JaccardSimilarity;
|
||||||
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
|
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.mortbay.log.Log;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -41,7 +46,6 @@ public class OrcidNoDoiTest {
|
||||||
String orcidIdA = "0000-0003-2760-1191";
|
String orcidIdA = "0000-0003-2760-1191";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
// @Ignore
|
|
||||||
public void readPublicationFieldsTest()
|
public void readPublicationFieldsTest()
|
||||||
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
|
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
|
||||||
logger.info("running loadPublicationFieldsTest ....");
|
logger.info("running loadPublicationFieldsTest ....");
|
||||||
|
@ -95,8 +99,7 @@ public class OrcidNoDoiTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
// @Ignore
|
public void authorMatchTest() throws Exception {
|
||||||
private void authorMatchTest() throws Exception {
|
|
||||||
logger.info("running authorSimpleMatchTest ....");
|
logger.info("running authorSimpleMatchTest ....");
|
||||||
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
|
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
|
||||||
AuthorData author = new AuthorData();
|
AuthorData author = new AuthorData();
|
||||||
|
@ -121,9 +124,60 @@ public class OrcidNoDoiTest {
|
||||||
logger.error("parsing xml", e);
|
logger.error("parsing xml", e);
|
||||||
}
|
}
|
||||||
assertNotNull(workData);
|
assertNotNull(workData);
|
||||||
|
|
||||||
|
Contributor a = workData.getContributors().get(0);
|
||||||
|
assertTrue(a.getCreditName().equals("Abdel-Dayem K"));
|
||||||
|
|
||||||
AuthorMatcher.match(author, workData.getContributors());
|
AuthorMatcher.match(author, workData.getContributors());
|
||||||
GsonBuilder builder = new GsonBuilder();
|
GsonBuilder builder = new GsonBuilder();
|
||||||
Gson gson = builder.create();
|
Gson gson = builder.create();
|
||||||
logger.info(gson.toJson(workData));
|
logger.info(gson.toJson(workData));
|
||||||
|
|
||||||
|
assertTrue(workData.getContributors().size() == 6);
|
||||||
|
Contributor c = workData.getContributors().get(0);
|
||||||
|
assertTrue(c.getOid().equals("0000-0003-2760-1191"));
|
||||||
|
assertTrue(c.getName().equals("Khairy"));
|
||||||
|
assertTrue(c.getSurname().equals("Abdel Dayem"));
|
||||||
|
assertTrue(c.getCreditName().equals("Abdel-Dayem K"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readContributorsTest()
|
||||||
|
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
|
||||||
|
logger.info("running loadPublicationFieldsTest ....");
|
||||||
|
String xml = IOUtils
|
||||||
|
.toString(
|
||||||
|
OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml"));
|
||||||
|
|
||||||
|
if (xml == null) {
|
||||||
|
logger.info("Resource not found");
|
||||||
|
}
|
||||||
|
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
|
||||||
|
if (p == null) {
|
||||||
|
logger.info("XMLRecordParserNoDoi null");
|
||||||
|
}
|
||||||
|
WorkDataNoDoi workData = null;
|
||||||
|
try {
|
||||||
|
workData = p.VTDParseWorkData(xml.getBytes());
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("parsing xml", e);
|
||||||
|
}
|
||||||
|
assertNotNull(workData.getContributors());
|
||||||
|
assertTrue(workData.getContributors().size() == 5);
|
||||||
|
assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName()));
|
||||||
|
assertTrue(workData.getContributors().get(0).getSequence().equals("seq0"));
|
||||||
|
assertTrue(workData.getContributors().get(0).getRole().equals("role0"));
|
||||||
|
assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1"));
|
||||||
|
assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence()));
|
||||||
|
assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole()));
|
||||||
|
assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2"));
|
||||||
|
assertTrue(workData.getContributors().get(2).getSequence().equals("seq2"));
|
||||||
|
assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole()));
|
||||||
|
assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3"));
|
||||||
|
assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence()));
|
||||||
|
assertTrue(workData.getContributors().get(3).getRole().equals("role3"));
|
||||||
|
assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName()));
|
||||||
|
assertTrue(workData.getContributors().get(4).getSequence().equals("seq4"));
|
||||||
|
assertTrue(workData.getContributors().get(4).getRole().equals("role4"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||||
|
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work"
|
||||||
|
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||||
|
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||||
|
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||||
|
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||||
|
<common:path>0000-0002-9157-3431</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Europe PubMed Central</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||||
|
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||||
|
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||||
|
</work:title>
|
||||||
|
<work:citation>
|
||||||
|
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||||
|
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||||
|
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||||
|
</work:citation>
|
||||||
|
<work:type>journal-article</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2016</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
</common:publication-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmid</common:external-id-type>
|
||||||
|
<common:external-id-value>27899851</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmc</common:external-id-type>
|
||||||
|
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true"
|
||||||
|
>PMC5126442</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||||
|
<work:contributors>
|
||||||
|
<work:contributor>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>seq0</work:contributor-sequence>
|
||||||
|
<work:contributor-role>role0</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>creditname1</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>creditname2</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>seq2</work:contributor-sequence>
|
||||||
|
<work:contributor-role></work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>creditname3</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence></work:contributor-sequence>
|
||||||
|
<work:contributor-role>role3</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name></work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>seq4</work:contributor-sequence>
|
||||||
|
<work:contributor-role>role4</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
</work:contributors>
|
||||||
|
</work:work>
|
12
pom.xml
12
pom.xml
|
@ -458,6 +458,18 @@
|
||||||
<version>${jsonschemagenerator.version}</version>
|
<version>${jsonschemagenerator.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-text</artifactId>
|
||||||
|
<version>${common.text.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
<version>${org.apache.httpcomponents.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Versions of dependencies should be only declared in the main pom file. Please declare this dependency there (v1.8) and refer to it without overriding the version.
Please remove the dependency version at all from here. The version has to be declared only in the main pom file. Just like all the other dependencies in the same pom file.
I still see the dependency version declared here. Please move in the project's main pom under the dependencyManagement section.