ORCID Enrichment and Download #364

Merged
claudio.atzori merged 14 commits from orcid_import into beta 2023-12-01 15:05:45 +01:00
43 changed files with 3251 additions and 229 deletions

View File

@ -4,194 +4,318 @@ package eu.dnetlib.dhp.oa.merge;
import java.text.Normalizer;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.model.Person;
import scala.Tuple2;
public class AuthorMerger {
private static final Double THRESHOLD = 0.95;
private static final Double THRESHOLD = 0.95;
private AuthorMerger() {
}
private AuthorMerger() {
}
public static List<Author> merge(List<List<Author>> authors) {
public static List<Author> merge(List<List<Author>> authors) {
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
List<Author> author = new ArrayList<>();
List<Author> author = new ArrayList<>();
for (List<Author> a : authors) {
author = mergeAuthor(author, a);
}
for (List<Author> a : authors) {
author = mergeAuthor(author, a);
}
return author;
return author;
}
}
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b);
List<Author> base;
List<Author> enrich;
int sa = authorsSize(a);
int sb = authorsSize(b);
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b);
List<Author> base;
List<Author> enrich;
int sa = authorsSize(a);
int sb = authorsSize(b);
if (sa == sb) {
base = pa > pb ? a : b;
enrich = pa > pb ? b : a;
} else {
base = sa > sb ? a : b;
enrich = sa > sb ? b : a;
}
enrichPidFromList(base, enrich, threshold);
return base;
}
if (sa == sb) {
base = pa > pb ? a : b;
enrich = pa > pb ? b : a;
} else {
base = sa > sb ? a : b;
enrich = sa > sb ? b : a;
}
enrichPidFromList(base, enrich, threshold);
return base;
}
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
return mergeAuthor(a, b, THRESHOLD);
}
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
return mergeAuthor(a, b, THRESHOLD);
}
private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
if (base == null || enrich == null)
return;
private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
if (base == null || enrich == null)
return;
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
// <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
.map(p -> new Tuple2<>(p, a)))
.collect(Collectors.toList());
// <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
.map(p -> new Tuple2<>(p, a)))
.collect(Collectors.toList());
pidToEnrich
.forEach(
a -> {
Optional<Tuple2<Double, Author>> simAuthor = base
.stream()
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
.max(Comparator.comparing(Tuple2::_1));
pidToEnrich
.forEach(
a -> {
Optional<Tuple2<Double, Author>> simAuthor = base
.stream()
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
.max(Comparator.comparing(Tuple2::_1));
if (simAuthor.isPresent()) {
double th = threshold;
// increase the threshold if the surname is too short
if (simAuthor.get()._2().getSurname() != null
&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
th = 0.99;
if (simAuthor.isPresent()) {
double th = threshold;
// increase the threshold if the surname is too short
if (simAuthor.get()._2().getSurname() != null
&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
th = 0.99;
if (simAuthor.get()._1() > th) {
Author r = simAuthor.get()._2();
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
if (simAuthor.get()._1() > th) {
Author r = simAuthor.get()._2();
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
tmp.add(a._1());
r.setPid(tmp);
}
}
});
}
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
tmp.add(a._1());
r.setPid(tmp);
}
}
});
}
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
}
public static String normalizeFullName(final String fullname) {
return nfd(fullname)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
public static int countAuthorsPids(List<Author> authors) {
if (authors == null)
return 0;
.trim();
}
return (int) authors.stream().filter(AuthorMerger::hasPid).count();
}
private static int authorsSize(List<Author> authors) {
if (authors == null)
return 0;
return authors.size();
}
private static String authorFieldToBeCompared(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {

Out of context this seems to account too much difference for strings of different length.
What about ensure to take as reference for while loop the shorter string, prefill count with the length difference and then add the char-by-char comparison difference?
That would be more permissive about strings that have the very same prefix.

Out of context this seems to account too much difference for strings of different length. What about ensure to take as reference for while loop the shorter string, prefill count with the length difference and then add the char-by-char comparison difference? That would be more permissive about strings that have the very same prefix.
Review

About hammingDist function is never used, was a test of previous comparing function so I delete it

About hammingDist function is never used, was a test of previous comparing function so I delete it
return author.getSurname();
private static Double sim(Author a, Author b) {
}
if (StringUtils.isNotBlank(author.getFullname())) {
return author.getFullname();
}
return null;
}
final Person pa = parse(a);
final Person pb = parse(b);
/**
* This method tries to figure out when two author are the same in the contest
* of ORCID enrichment
*
* @param left Author in the OAF entity
* @param right Author ORCID
* @return based on a heuristic on the names of the authors if they are the same.
*/
public static boolean checkORCIDSimilarity(final Author left, final Author right) {
final Person pl = parse(left);
final Person pr = parse(right);
// if both are accurate (e.g. they have name and surname)
if (pa.isAccurate() & pb.isAccurate()) {
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
} else {
return new JaroWinkler()
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
}
}
// If one of them didn't have a surname we verify if they have the fullName not empty
// and verify if the normalized version is equal
if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
private static boolean hasPid(Author a) {
if (a == null || a.getPid() == null || a.getPid().isEmpty())
return false;
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
}
if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
&& !pr.getFullname().isEmpty()) {
return pl
.getFullname()
.stream()
.anyMatch(
fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
} else {
return false;
}
}
// The Authors have one surname in common
if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
private static Person parse(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return new Person(author.getSurname() + ", " + author.getName(), false);
} else {
if (StringUtils.isNotBlank(author.getFullname()))
return new Person(author.getFullname(), false);
else
return new Person("", false);
}
}
// If one of them has only a surname and is the same we can say that they are the same author
if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
return true;
// The authors have the same initials of Name in common
if (pl
.getName()
.stream()
.anyMatch(
nl -> pr
.getName()
.stream()
.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
return true;
}
private static String normalize(final String s) {
String[] normalized = nfd(s)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim()
.split(" ");
// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
// We verify if we have an exact match between name and surname
if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
return true;
else
return false;
}
//
Arrays.sort(normalized);
/**
* Method to enrich ORCID information in one list of authors based on another list
*
* @param baseAuthor the Author List in the OAF Entity
* @param orcidAuthor The list of ORCID Author intersected
* @return The Author List of the OAF Entity enriched with the orcid Author
*/
public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
return String.join(" ", normalized);
}
if (baseAuthor == null || baseAuthor.isEmpty())
return orcidAuthor;
private static String nfd(final String s) {
return Normalizer.normalize(s, Normalizer.Form.NFD);
}
if (orcidAuthor == null || orcidAuthor.isEmpty())
return baseAuthor;
if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
return baseAuthor;
final List<Author> oAuthor = new ArrayList<>();
oAuthor.addAll(orcidAuthor);
baseAuthor.forEach(ba -> {
Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
if (aMatch.isPresent()) {
final Author sameAuthor = aMatch.get();
addPid(ba, sameAuthor.getPid());
oAuthor.remove(sameAuthor);
}
});
return baseAuthor;
}
private static void addPid(final Author a, final List<StructuredProperty> pids) {
if (a.getPid() == null) {
a.setPid(new ArrayList<>());
}
a.getPid().addAll(pids);
}
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
}
public static int countAuthorsPids(List<Author> authors) {
if (authors == null)
return 0;
return (int) authors.stream().filter(AuthorMerger::hasPid).count();
}
private static int authorsSize(List<Author> authors) {
if (authors == null)
return 0;
return authors.size();
}
private static Double sim(Author a, Author b) {
final Person pa = parse(a);
final Person pb = parse(b);
// if both are accurate (e.g. they have name and surname)
if (pa.isAccurate() & pb.isAccurate()) {
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
} else {
return new JaroWinkler()
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
}
}
private static boolean hasPid(Author a) {
if (a == null || a.getPid() == null || a.getPid().isEmpty())
return false;
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
}
private static Person parse(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return new Person(author.getSurname() + ", " + author.getName(), false);
} else {
if (StringUtils.isNotBlank(author.getFullname()))
return new Person(author.getFullname(), false);
else
return new Person("", false);
}
}
public static String normalize(final String s) {
String[] normalized = nfd(s)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim()
.split(" ");
Arrays.sort(normalized);
return String.join(" ", normalized);
}
private static String nfd(final String s) {
return Normalizer.normalize(s, Normalizer.Form.NFD);
}
}

View File

@ -0,0 +1,114 @@
package eu.dnetlib.oa.merge;
import static org.junit.jupiter.api.Assertions.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Objects;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.oaf.Author;
public class AuthorMergerTest {
@Test
public void testEnrcichAuthor() throws Exception {
final ObjectMapper mapper = new ObjectMapper();
BufferedReader pr = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
BufferedReader or = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
};
String pubLine;
int i = 0;
while ((pubLine = pr.readLine()) != null) {
final String pubId = pubLine;
final String MatchPidOrcid = or.readLine();
final String pubOrcid = or.readLine();
final String data = pr.readLine();
if (StringUtils.isNotBlank(data)) {
List<Author> publicationAuthors = mapper.readValue(data, aclass);
List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
System.out.printf("OAF ID = %s \n", pubId);
System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
long originalAuthorWithPiD = publicationAuthors
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long start = System.currentTimeMillis();
// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
long enrichedAuthorWithPid = enrichedList
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long totalTime = (System.currentTimeMillis() - start) / 1000;
System.out
.printf(
"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
enrichedAuthorWithPid);
System.out.println("=================");
}
}
}
@Test
public void checkSimilarityTest() {
final Author left = new Author();
left.setName("Anand");
left.setSurname("Rachna");
left.setFullname("Anand, Rachna");
System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
final Author right = new Author();
right.setName("Rachna");
right.setSurname("Anand");
right.setFullname("Rachna, Anand");
// System.out.println(AuthorMerger.normalize(right.getFullname()));
boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
assertTrue(same);
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class DownloadORCIDDumpApplication {
private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
private final FileSystem fileSystem;
public DownloadORCIDDumpApplication(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final String apiURL = argumentParser.get("apiURL");
log.info("apiURL is {}", apiURL);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
}
private void downloadItem(final String name, final String itemURL, final String basePath) {
try {
final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
final HttpGet request = new HttpGet(itemURL);
final int timeout = 60; // seconds
final RequestConfig config = RequestConfig
.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000)
.build();
log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
CloseableHttpResponse response = client.execute(request)) {
int responseCode = response.getStatusLine().getStatusCode();
log.info("Response code is {}", responseCode);
if (responseCode >= 200 && responseCode < 400) {
IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
}
} catch (Throwable eu) {
throw new RuntimeException(eu);
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
protected void run(final String targetPath, final String apiURL) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final URL url = new URL(apiURL);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
final String json = IOUtils.toString(is);
JsonNode jsonNode = mapper.readTree(json);
jsonNode
.get("files")
.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
}
}

View File

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class ExtractORCIDDump {
private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
private final FileSystem fileSystem;
public ExtractORCIDDump(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String sourcePath = argumentParser.get("sourcePath");
log.info("sourcePath is {}", sourcePath);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
}
public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
final List<ORCIDExtractor> workers = new ArrayList<>();
int i = 0;
while (ls.hasNext()) {
LocatedFileStatus current = ls.next();
if (current.getPath().getName().endsWith("tar.gz")) {
workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
}
}
workers.forEach(Thread::start);
for (ORCIDExtractor worker : workers) {
worker.join();
}
}
}

View File

@ -0,0 +1,171 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**\
* The ORCIDExtractor class extracts ORCID data from a TAR archive.
* The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
* Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
* Finally, it closes all the SequenceFile.Writer objects.
*/
public class ORCIDExtractor extends Thread {
private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
private final FileSystem fileSystem;
private final String id;
private final Path sourcePath;
private final String baseOutputPath;
public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
this.fileSystem = fileSystem;
this.id = id;
this.sourcePath = sourcePath;
this.baseOutputPath = baseOutputPath;
}
/**
* creates a map of SequenceFile.Writer objects,
* one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
* For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
* object that writes employment data.
* @return the Map
*/
private Map<String, SequenceFile.Writer> createMap() {
try {
log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
Map<String, SequenceFile.Writer> res = new HashMap<>();
if (sourcePath.getName().contains("summaries")) {
final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
final SequenceFile.Writer summary_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(summaryPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
log.info("Thread {} Creating only summary path here {}", id, summaryPath);
res.put("summary", summary_file);
return res;
} else {
String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
final SequenceFile.Writer employments_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(employmentsPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("employments", employments_file);
log.info("Thread {} Creating employments path here {}", id, employmentsPath);
final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
final SequenceFile.Writer works_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(worksPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("works", works_file);
log.info("Thread {} Creating works path here {}", id, worksPath);
return res;
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
@Override
public void run() {
CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
CompressionCodec codec = factory.getCodec(sourcePath);
if (codec == null) {
System.err.println("No codec found for " + sourcePath.getName());
System.exit(1);
}
InputStream gzipInputStream = null;
try {
gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
final Map<String, SequenceFile.Writer> fileMap = createMap();
iterateTar(fileMap, gzipInputStream);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
log.info("Closing gzip stream");
IOUtils.closeStream(gzipInputStream);
}
}
private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
if (sourcePath.getName().contains("summaries")) {
return fileMap.get("summary");
}
if (path.contains("works")) {
return fileMap.get("works");
}
if (path.contains("employments"))
return fileMap.get("employments");
return null;
}
private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
int extractedItem = 0;
try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry;
while ((entry = tais.getNextTarEntry()) != null) {
if (entry.isFile()) {
final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
if (fl != null) {
final Text key = new Text(entry.getName());
final Text value = new Text(
org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
fl.append(key, value);
extractedItem++;
if (extractedItem % 100000 == 0) {
log.info("Thread {}: Extracted {} items", id, extractedItem);
break;
}
}
}
}
} finally {
for (SequenceFile.Writer k : fileMap.values()) {
log.info("Thread {}: Completed processed {} items", id, extractedItem);
k.hflush();
k.close();
}
}
}
}

View File

@ -0,0 +1,251 @@
package eu.dnetlib.dhp.collection.orcid;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ximpleware.*;
import eu.dnetlib.dhp.collection.orcid.model.*;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
public class OrcidParser {
final Logger log = LoggerFactory.getLogger(OrcidParser.class);
private VTDNav vn;
private AutoPilot ap;
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
private static final String NS_COMMON = "common";
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
private static final String NS_PERSON = "person";
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
private static final String NS_DETAILS = "personal-details";
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
private static final String NS_OTHER = "other-name";
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
private static final String NS_RECORD = "record";
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
private static final String NS_ACTIVITIES = "activities";
private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
private static final String NS_WORK = "work";
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
private static final String NS_ERROR = "error";
private static final String NS_HISTORY = "history";
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
private static final String NS_BULK = "bulk";
private static final String NS_EXTERNAL = "external-identifier";
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
private void generateParsedDocument(final String xml) throws ParseException {
final VTDGen vg = new VTDGen();
vg.setDoc(xml.getBytes());
vg.parse(true);
this.vn = vg.getNav();
this.ap = new AutoPilot(vn);
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
}
public Author parseSummary(final String xml) {
try {
final Author author = new Author();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//record:record", Arrays.asList("path"));
if (!recordNodes.isEmpty()) {
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
author.setOrcid(oid);
} else {
return null;
}
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//person:name", Arrays.asList("visibility"));
final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
author.setVisibility(visibility);
final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
author.setGivenName(name);
final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
author.setFamilyName(surnames);
final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
author.setCreditName(creditNames);
final String biography = VtdUtilityParser
.getSingleValue(ap, vn, "//person:biography/personal-details:content");
author.setBiography(biography);
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
if (!otherNames.isEmpty()) {
author.setOtherNames(otherNames);
}
ap.selectXPath("//external-identifier:external-identifier");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
author.addOtherPid(pid);
}
return author;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
public Work parseWork(final String xml) {
try {
final Work work = new Work();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
if (!workNodes.isEmpty()) {
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
work.setOrcid(oid);
} else {
return null;
}
ap.selectXPath("//common:external-id");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
work.addPid(pid);
}
work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
return work;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
private String extractEmploymentDate(final String xpath) throws Exception {
ap.selectXPath(xpath);
StringBuilder sb = new StringBuilder();
while (ap.evalXPath() != -1) {
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:year");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:month");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:day");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
}
return sb.toString();
}
public Employment parseEmployment(final String xml) {
try {
final Employment employment = new Employment();
generateParsedDocument(xml);
final String oid = VtdUtilityParser
.getSingleValue(ap, vn, "//common:source-orcid/common:path");
if (StringUtils.isNotBlank(oid)) {
employment.setOrcid(oid);
} else {
return null;
}
final String depName = VtdUtilityParser
.getSingleValue(ap, vn, "//common:department-name");
final String rolTitle = VtdUtilityParser
.getSingleValue(ap, vn, "//common:role-title");
if (StringUtils.isNotBlank(rolTitle))
employment.setRoleTitle(rolTitle);
if (StringUtils.isNotBlank(depName))
employment.setDepartmentName(depName);
else
employment
.setDepartmentName(
VtdUtilityParser
.getSingleValue(ap, vn, "//common:organization/common:name"));
employment.setStartDate(extractEmploymentDate("//common:start-date"));
employment.setEndDate(extractEmploymentDate("//common:end-date"));
final String affiliationId = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
final String affiliationIdType = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguation-source");
if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
return employment;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
}

View File

@ -0,0 +1,83 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Author extends ORCIDItem {
private String givenName;
private String familyName;
private String visibility;
private String creditName;
private List<String> otherNames;
private List<Pid> otherPids;
private String biography;
public String getBiography() {
return biography;
}
public void setBiography(String biography) {
this.biography = biography;
}
public String getGivenName() {
return givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public String getFamilyName() {
return familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public String getCreditName() {
return creditName;
}
public void setCreditName(String creditName) {
this.creditName = creditName;
}
public List<String> getOtherNames() {
return otherNames;
}
public void setOtherNames(List<String> otherNames) {
this.otherNames = otherNames;
}
public String getVisibility() {
return visibility;
}
public void setVisibility(String visibility) {
this.visibility = visibility;
}
public List<Pid> getOtherPids() {
return otherPids;
}
public void setOtherPids(List<Pid> otherPids) {
this.otherPids = otherPids;
}
public void addOtherPid(final Pid pid) {
if (otherPids == null)
otherPids = new ArrayList<>();
otherPids.add(pid);
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Employment extends ORCIDItem {
private String startDate;
private String EndDate;
private Pid affiliationId;
private String departmentName;
private String roleTitle;
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return EndDate;
}
public void setEndDate(String endDate) {
EndDate = endDate;
}
public Pid getAffiliationId() {
return affiliationId;
}
public void setAffiliationId(Pid affiliationId) {
this.affiliationId = affiliationId;
}
public String getDepartmentName() {
return departmentName;
}
public void setDepartmentName(String departmentName) {
this.departmentName = departmentName;
}
public String getRoleTitle() {
return roleTitle;
}
public void setRoleTitle(String roleTitle) {
this.roleTitle = roleTitle;
}
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class ORCIDItem {
private String orcid;
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Pid {
private String value;
private String schema;
public Pid() {
}
public Pid(String value, String schema) {
this.value = value;
this.schema = schema;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getSchema() {
return schema;
}
public void setSchema(String schema) {
this.schema = schema;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Work extends ORCIDItem {
private String title;
private List<Pid> pids;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public List<Pid> getPids() {
return pids;
}
public void setPids(List<Pid> pids) {
this.pids = pids;
}
public void addPid(Pid pid) {
if (pids == null)
pids = new ArrayList<>();
pids.add(pid);
}
}

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH to extract files",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH where the tar.gz files were downloaded",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "m",
"paramLongName": "master",
"paramDescription": "the master name",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH of the DF tables",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH of the ORCID sequence file",
"paramRequired": true
}
]

View File

@ -0,0 +1,23 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,81 @@
<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>targetPath</name>
<description>the path to store the original ORCID dump</description>
</property>
<property>
<name>apiURL</name>
<description>The figshare API URL to retrieve the list file to download</description>
</property>
</parameters>
<start to="generateTables"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadDUMP">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--apiURL</arg><arg>${apiURL}</arg>
</java>
<ok to="extractDump"/>
<error to="Kill"/>
</action>
<action name="extractDump">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
<java-opts> -Xmx6g </java-opts>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${targetPath}</arg>
<arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
</java>
<ok to="generateTables"/>
<error to="Kill"/>
</action>
<action name="generateTables">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Generate ORCID Tables</name>
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}/tables</arg>
<arg>--master</arg><arg>yarn</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.collection.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
import org.apache.hadoop.io.Text
import org.apache.spark.SparkContext
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = {
val sourcePath: String = parser.get("sourcePath")
log.info("found parameters sourcePath: {}", sourcePath)
val targetPath: String = parser.get("targetPath")
log.info("found parameters targetPath: {}", targetPath)
extractORCIDTable(spark, sourcePath, targetPath)
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
extractORCIDWorksTable(spark, sourcePath, targetPath)
}
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("summaries"))
.map { r =>
val p = new OrcidParser
p.parseSummary(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Authors")
}
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("works"))
.map { r =>
val p = new OrcidParser
p.parseWork(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Works")
}
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("employments"))
.map { r =>
val p = new OrcidParser
p.parseEmployment(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Employments")
}
}
object SparkGenerateORCIDTable {
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
def main(args: Array[String]): Unit = {
new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
.initialize()
.run()
}
}

View File

@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
), resourceType
),
resourceType
)
}
if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
), schemaOrg
),
schemaOrg
)
}
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
), resourceTypeGeneral
),
resourceTypeGeneral
)
}
@ -228,7 +231,6 @@ object DataciteToOAFTransformation {
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
typeQualifiers._2.getClassname match {
case "dataset" =>
val r = new OafDataset

View File

@ -593,7 +593,6 @@ object BioDBToOAF {
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava)
i.setDateofacceptance(

View File

@ -195,7 +195,7 @@ object PubMedToOaf {
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else {
val i_type = article.getPublicationTypes.asScala
.map(s => (s.getValue,getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.find(q => q._2 != null)
if (i_type.isDefined) {
@ -205,8 +205,7 @@ object PubMedToOaf {
itm.setOriginalType(i_type.get._1)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
}
else
} else
return null
}
val result = createResult(pubmedInstance.getInstancetype, vocabularies)

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.dnetlib.dhp.collection.orcid.model.Author;
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
import eu.dnetlib.dhp.parser.utility.VtdException;
public class DownloadORCIDTest {
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
@Test
public void testSummary() throws Exception {
final String xml = IOUtils
.toString(
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
final OrcidParser parser = new OrcidParser();
ORCIDItem orcidItem = parser.parseSummary(xml);
final ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(orcidItem));
}
@Test
public void testParsingWork() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
@Test
public void testParsingEmployments() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
}

View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
<common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
<common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
</common:source>
<work:title>
<common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
</work:title>
<work:journal-title>American Journal of Neuroradiology</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2014</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
<common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-84911865199</common:external-id-value>
<common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Durst, C.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Starke, R.M.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Gaughen, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Nguyen, Q.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Patrie, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Jensen, M.E.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Evans, A.J.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
path="/0000-0001-5349-4030/work/50101152" visibility="public">
<common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
<common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
</common:source>
<work:title>
<common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
racially homogenous and heterogeneous U.S. history classrooms</common:title>
</work:title>
<work:journal-title>Journal of Social Studies Research</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
candor in addressing social injustices in racially homogenous and heterogeneous U.S.
history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
= {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2018</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
<common:external-id-normalized transient="true"
>10.1016/j.jssr.2018.01.004</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-85041949043</common:external-id-value>
<common:external-id-normalized transient="true"
>2-s2.0-85041949043</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Parkhouse, H.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Massaro, V.R.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Abdel-Dayem K</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Abdel-Dayem Fake</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Khair Abde Daye</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:contributor-attributes>
<work:contributor-sequence>seq0</work:contributor-sequence>
<work:contributor-role>role0</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname1</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>creditname2</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq2</work:contributor-sequence>
<work:contributor-role></work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname3</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence></work:contributor-sequence>
<work:contributor-role>role3</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name></work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq4</work:contributor-sequence>
<work:contributor-role>role4</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
visibility="public">
<common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
<common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Quang Nguyen</common:source-name>
</common:source>
<common:organization>
<common:name>Beth Israel Deaconess Medical Center</common:name>
<common:address>
<common:city>Boston</common:city>
<common:region>MA</common:region>
<common:country>US</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
visibility="public">
<common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
<common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
<common:path>0000-0001-5011-3001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>zhengyan li</common:source-name>
</common:source>
<common:start-date>
<common:year>2008</common:year>
<common:month>09</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Anhui Academy of Agricultural Sciences</common:name>
<common:address>
<common:city>Hefei</common:city>
<common:region>Anhui</common:region>
<common:country>CN</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
visibility="public">
<common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
<common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
<common:path>0000-0001-5012-1001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Asma Bazzi</common:source-name>
</common:source>
<common:department-name>Pathology and Laboratory Medicine</common:department-name>
<common:role-title>Medical Laboratory Technologist</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>01</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>06</common:month>
<common:day>30</common:day>
</common:end-date>
<common:organization>
<common:name>American University of Beirut</common:name>
<common:address>
<common:city>Hamra</common:city>
<common:region>Beirut</common:region>
<common:country>LB</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,581 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
<common:orcid-identifier>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:orcid-identifier>
<preferences:preferences>
<preferences:locale>es</preferences:locale>
</preferences:preferences>
<history:history>
<history:creation-method>Direct</history:creation-method>
<history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<history:claimed>true</history:claimed>
<history:verified-email>true</history:verified-email>
<history:verified-primary-email>true</history:verified-primary-email>
</history:history>
<person:person path="/0000-0001-5045-1000/person">
<person:name visibility="public" path="0000-0001-5045-1000">
<common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
<common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
<personal-details:given-names>Patricio</personal-details:given-names>
<personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
</person:name>
<other-name:other-names path="/0000-0001-5045-1000/other-names"/>
<person:biography visibility="public" path="/0000-0001-5045-1000/biography">
<common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
<common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
<personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
</person:biography>
<researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
<email:emails path="/0000-0001-5045-1000/email"/>
<address:addresses path="/0000-0001-5045-1000/address"/>
<keyword:keywords path="/0000-0001-5045-1000/keywords"/>
<external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
<common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
<common:path>0000-0001-7291-3210</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
</common:source>
<common:external-id-type>Scopus Author ID</common:external-id-type>
<common:external-id-value>6602255248</common:external-id-value>
<common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
<common:external-id-relationship>self</common:external-id-relationship>
</external-identifier:external-identifier>
</external-identifier:external-identifiers>
</person:person>
<activities:activities-summary path="/0000-0001-5045-1000/activities">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:distinctions path="/0000-0001-5045-1000/distinctions">
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:external-ids/>
<distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
<common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Maestría</common:department-name>
<common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</distinction:distinction-summary>
</activities:affiliation-group>
</activities:distinctions>
<activities:educations path="/0000-0001-5045-1000/educations">
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:external-ids/>
<education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
<common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Doctorado en Sociología</common:department-name>
<common:role-title>Doctorando del Programa de Sociología</common:role-title>
<common:start-date>
<common:year>2020</common:year>
<common:month>11</common:month>
<common:day>06</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
<common:address>
<common:city>Madrid</common:city>
<common:region>Comunidad de Madrid</common:region>
<common:country>ES</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</education:education-summary>
</activities:affiliation-group>
</activities:educations>
<activities:employments path="/0000-0001-5045-1000/employments">
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
<common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Especialista de Proyectos y docente</common:role-title>
<common:start-date>
<common:year>2021</common:year>
<common:month>11</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad de las Artes</common:name>
<common:address>
<common:city>Guayaquil</common:city>
<common:region>Guayas</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
<common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Director</common:role-title>
<common:start-date>
<common:year>2019</common:year>
<common:month>11</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>2021</common:year>
<common:month>10</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Regional Amazónica IKIAM</common:name>
<common:address>
<common:city>Tena</common:city>
<common:region>Napo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
<common:url>http://ikiam.edu.ec</common:url>
</employment:employment-summary>
</activities:affiliation-group>
</activities:employments>
<activities:fundings path="/0000-0001-5045-1000/fundings"/>
<activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
<activities:memberships path="/0000-0001-5045-1000/memberships">
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:external-ids/>
<membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
<common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Artes Escénicas</common:department-name>
<common:role-title>Miembro</common:role-title>
<common:start-date>
<common:year>2000</common:year>
<common:month>07</common:month>
<common:day>15</common:day>
</common:start-date>
<common:organization>
<common:name>Casa de la Cultura Ecuatoriana</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Sierra Centro</common:region>
<common:country>EC</common:country>
</common:address>
</common:organization>
</membership:membership-summary>
</activities:affiliation-group>
</activities:memberships>
<activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
<activities:qualifications path="/0000-0001-5045-1000/qualifications">
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
<common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Gobernabilidad</common:department-name>
<common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:end-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:end-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
<common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Posgrados</common:department-name>
<common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
<common:start-date>
<common:year>2001</common:year>
<common:month>03</common:month>
<common:day>09</common:day>
</common:start-date>
<common:end-date>
<common:year>2003</common:year>
<common:month>02</common:month>
<common:day>27</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
<common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Ciencias de la Educación</common:department-name>
<common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>03</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>01</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
<common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Facultad de Artes</common:department-name>
<common:role-title>Licenciado en Artes</common:role-title>
<common:start-date>
<common:year>1989</common:year>
<common:month>09</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>1997</common:year>
<common:month>08</common:month>
<common:day>07</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Central del Ecuador</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
</activities:qualifications>
<activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
<activities:services path="/0000-0001-5045-1000/services"/>
<activities:works path="/0000-0001-5045-1000/works">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:group>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>isbn</common:external-id-type>
<common:external-id-value>978-9942-29-089-2</common:external-id-value>
<common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
<common:external-id-relationship>part-of</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:type>book-chapter</work:type>
<common:publication-date>
<common:year>2023</common:year>
<common:month>06</common:month>
<common:day>07</common:day>
</common:publication-date>
<work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
<common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>conference-abstract</work:type>
<common:publication-date>
<common:year>2022</common:year>
<common:month>10</common:month>
<common:day>06</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>conference-poster</work:type>
<common:publication-date>
<common:year>2018</common:year>
<common:month>11</common:month>
<common:day>30</common:day>
</common:publication-date>
<work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>dissertation-thesis</work:type>
<common:publication-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>La Rebelión de los Dioses</common:title>
</work:title>
<common:external-ids/>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>registered-copyright</work:type>
<common:publication-date>
<common:year>2001</common:year>
<common:month>08</common:month>
<common:day>28</common:day>
</common:publication-date>
<work:journal-title>Editorial pedagógica freire</work:journal-title>
</work:work-summary>
</activities:group>
</activities:works>
</activities:activities-summary>
</record:record>

View File

@ -133,32 +133,6 @@
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="PreProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="PreProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>

View File

@ -59,10 +59,10 @@
</property>
<!-- ORCID Parameters -->
<property>
<name>workingPathOrcid</name>
<description>the ORCID working path</description>
</property>
<!-- <property>-->
<!-- <name>workingPathOrcid</name>-->
<!-- <description>the ORCID working path</description>-->
<!-- </property>-->
</parameters>
@ -170,32 +170,6 @@
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="ProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="ProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="CreateDOIBoost"/>
<error to="Kill"/>
</action>

View File

@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
Encoders.tuple(Encoders.STRING, mapEncoderPub)
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
logger.info("Phase 2) Join Crossref with UnpayWall")
logger.info("Phase 1) Join Crossref with UnpayWall")
val crossrefPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
.write
.mode(SaveMode.Overwrite)
.save(s"$workingDirPath/firstJoin")
logger.info("Phase 3) Join Result with ORCID")
val fj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val orcidPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
.map(applyMerge)
.write
.mode(SaveMode.Overwrite)
.save(s"$workingDirPath/secondJoin")
logger.info("Phase 4) Join Result with MAG")
logger.info("Phase 2) Join Result with MAG")
val sj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val magPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))

View File

@ -107,7 +107,7 @@ case object Crossref2Oaf {
.map(f => f.id)
}
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType:String): Result = {
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID
@ -372,7 +372,7 @@ case object Crossref2Oaf {
objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
)
mappingResult(result, json, cOBJCategory, originalType)
mappingResult(result, json, cOBJCategory, objectSubType)
if (result == null || result.getId == null)
return List()

View File

@ -0,0 +1,26 @@
[
{
"paramName": "mt",
"paramLongName": "master",
"paramDescription": "should be local or yarn",
"paramRequired": true
},
{
"paramName": "op",
"paramLongName": "orcidPath",
"paramDescription": "the path of the orcid Table generated by the dump",
"paramRequired": true
},
{
"paramName": "gp",
"paramLongName": "graphPath",
"paramDescription": "the path of the graph we want to apply enrichment",
"paramRequired": true
},
{
"paramName": "tp",
"paramLongName": "targetPath",
"paramDescription": "the output path of the graph enriched",
"paramRequired": true
}
]

View File

@ -0,0 +1,34 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,88 @@
<workflow-app name="Enrich_graph_with_ORCID_Workflow" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>orcidPath</name>
<description>the path of the orcid Table generated by the dump</description>
</property>
<property>
<name>graphPath</name>
<description>the path of the graph we want to apply enrichment</description>
</property>
<property>
<name>targetPath</name>
<description>the output path of the graph enriched</description>
</property>
</parameters>
<start to="EnrichGraph"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="EnrichGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Enrich Graph with ORCID</name>
<class>eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--orcidPath</arg>
<arg>${orcidPath}</arg>
<arg>--targetPath</arg>
<arg>${targetPath}</arg>
<arg>--graphPath</arg>
<arg>${graphPath}</arg>
<arg>--master</arg>
<arg>yarn</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="copy_datasource">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/datasource</arg>
<arg>${nameNode}/${targetPath}/datasource</arg>
</distcp>
<ok to="copy_organization"/>
<error to="Kill"/>
</action>
<action name="copy_organization">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/organization</arg>
<arg>${nameNode}/${targetPath}/organization</arg>
</distcp>
<ok to="copy_project"/>
<error to="Kill"/>
</action>
<action name="copy_project">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/project</arg>
<arg>${nameNode}/${targetPath}/project</arg>
</distcp>
<ok to="copy_relation"/>
<error to="Kill"/>
</action>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/relation</arg>
<arg>${nameNode}/${targetPath}/relation</arg>
</distcp>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
import eu.dnetlib.dhp.schema.sx.OafUtils
import org.apache.spark.sql.Row
import scala.collection.JavaConverters._
object AuthorEnricher extends Serializable {
def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
val a = new Author
a.setName(givenName)
a.setSurname(familyName)
a.setFullname(s"$givenName $familyName")
val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
pid.setDataInfo(OafUtils.generateDataInfo())
pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
a.setPid(List(pid).asJava)
a
}
def toOAFAuthor(r: Row): java.util.List[Author] = {
r.getList[Row](1)
.asScala
.map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
.toList
.asJava
}
// def enrichAuthor(p:Publication,r:Row): Unit = {
// val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
// println(k)
//
//
//
// }
}

View File

@ -0,0 +1,138 @@
package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.oa.merge.AuthorMerger
import eu.dnetlib.dhp.schema.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = {
val graphPath = parser.get("graphPath")
log.info(s"graphPath is '$graphPath'")
val orcidPath = parser.get("orcidPath")
log.info(s"orcidPath is '$orcidPath'")
val targetPath = parser.get("targetPath")
log.info(s"targetPath is '$targetPath'")

This can be transformed in a loop using ModelSupport.entityTypes filtering non-result types

This can be transformed in a loop using ModelSupport.entityTypes filtering non-result types
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
enrichResult(
spark,
s"$graphPath/publication",
orcidPublication,
s"$targetPath/publication",
Encoders.bean(classOf[Publication])
)
enrichResult(
spark,
s"$graphPath/dataset",
orcidPublication,
s"$targetPath/dataset",
Encoders.bean(classOf[eu.dnetlib.dhp.schema.oaf.Dataset])
)
enrichResult(
spark,
s"$graphPath/software",
orcidPublication,
s"$targetPath/software",
Encoders.bean(classOf[Software])
)
enrichResult(
spark,
s"$graphPath/otherresearchproduct",
orcidPublication,
s"$targetPath/otherresearchproduct",
Encoders.bean(classOf[OtherResearchProduct])
)
}
private def enrichResult[T <: Result](
spark: SparkSession,
graphPath: String,
orcidPublication: Dataset[Row],
outputPath: String,
enc: Encoder[T]
): Unit = {
val entities = spark.read

datainfo.deletedbyinference != true will take care of the case where datainfo is null

datainfo.deletedbyinference != true will take care of the case where datainfo is null
Review

Thanks you @giambattista.bloisi I'll update the code

Thanks you @giambattista.bloisi I'll update the code
.schema(enc.schema)
.json(graphPath)
.select(col("id"), col("datainfo"), col("instance"))
.where("datainfo.deletedbyinference != true")
.drop("datainfo")
.withColumn("instances", explode(col("instance")))
.withColumn("pids", explode(col("instances.pid")))
.select(
col("pids.qualifier.classid").alias("pid_schema"),
col("pids.value").alias("pid_value"),
col("id").alias("dnet_id")
)
val orcidDnet = orcidPublication
.join(
entities,
lower(col("schema")).equalTo(lower(col("pid_schema"))) &&
lower(col("value")).equalTo(lower(col("pid_value"))),
"inner"
)
.groupBy(col("dnet_id"))
.agg(collect_set(orcidPublication("author")).alias("orcid_authors"))
.select("dnet_id", "orcid_authors")
.cache()
orcidDnet.count()
val result = spark.read.schema(enc.schema).json(graphPath).as[T](enc)
result
.joinWith(orcidDnet, result("id").equalTo(orcidDnet("dnet_id")), "left")
.map {
case (r: T, null) =>
r
case (p: T, r: Row) =>
p.setAuthor(AuthorMerger.enrichOrcid(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
p
}(enc)
.write
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath)
}
private def generateOrcidTable(spark: SparkSession, inputPath: String): Dataset[Row] = {
val orcidAuthors =
spark.read.load(s"$inputPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames")
val orcidWorks = spark.read

a shorter form is identifier.schema IN ('doi', 'pmid', ...)

a shorter form is identifier.schema IN ('doi', 'pmid', ...)
Review

Thanks you @giambattista.bloisi I'll update the code

Thanks you @giambattista.bloisi I'll update the code
.load(s"$inputPath/Works")
.select(col("orcid"), explode(col("pids")).alias("identifier"))
.where(
"identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
)
val orcidPublication = orcidAuthors
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
.select(
col("identifier.schema").alias("schema"),
col("identifier.value").alias("value"),
struct(orcidAuthors("orcid").alias("orcid"), col("givenName"), col("familyName")).alias("author")
)
orcidPublication.cache()
}
}
object SparkEnrichGraphWithOrcidAuthors {
val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass)
def main(args: Array[String]): Unit = {
new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log)
.initialize()
.run()
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
import org.apache.spark.sql.{Column, Encoder, Encoders, Row, SparkSession}
import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.sql.functions._
class EnrichOrcidTest {
val log: Logger = LoggerFactory.getLogger(getClass)
def test() = {
val spark = SparkSession.builder().master("local[*]").getOrCreate()
// spark.sparkContext.setLogLevel("ERROR")
// new SparkEnrichGraphWithOrcidAuthors(null, null, null)
// .enrichResult(
// spark,
// "/Users/sandro/orcid_test/publication",
// "",
// "/tmp/graph/",
// Encoders.bean(classOf[Publication])
// )
val schema = Encoders.bean(classOf[Publication]).schema
//
// val simplifyAuthor = udf((r: Seq[Row]) => {
// r
// .map(k =>
// AuthorPid(
// k.getAs[String]("fullname"),
// k.getAs[Seq[Row]]("pid")
// .map(p => Pid(p.getAs[Row]("qualifier").getAs[String]("classid"), p.getAs[String]("value")))
// .toList
// )
// )
// .filter(l => l.pids.nonEmpty)
// .toList
// })
//
// val wrong_orcid_intersection = udf((a: Seq[Row]) => {
// a.map(author => {
// val pids_with_orcid: Seq[Row] = author
// .getAs[Seq[Row]]("pids")
// .filter(p =>
// p.getAs[String]("pidScheme") != null && p.getAs[String]("pidScheme").toLowerCase.contains("orcid")
// )
// if (pids_with_orcid.exists(p => p.getAs[String]("pidScheme").equals("ORCID"))) {
// if (pids_with_orcid.map(p => p.getAs[String]("pidValue").toLowerCase).distinct.size > 1) {
// AuthorPid(
// author.getAs[String]("fullName"),
// pids_with_orcid.map(p => Pid(p.getAs[String]("pidScheme"), p.getAs[String]("pidValue"))).toList
// )
//
// } else
// null
// } else
// null
// }).filter(author => author != null)
// })
Encoders
import spark.implicits._
// val enriched = spark.read
// .schema(schema)
// .json("/Users/sandro/orcid_test/publication_enriched")
// .select(col("id"), explode(col("author")).as("authors"))
// .withColumn("ap", col("authors.pid.qualifier.classid"))
// .withColumn("dp", col("authors.pid.datainfo.provenanceAction.classid"))
//
// .show()
}
}