Merge pull request 'ORCID Enrichment and Download' (#364) from orcid_import into beta

Reviewed-on: D-Net/dnet-hadoop#364
This commit is contained in:
Claudio Atzori 2023-12-01 15:05:44 +01:00
commit c5ac593c07
43 changed files with 3251 additions and 229 deletions

View File

@ -4,194 +4,318 @@ package eu.dnetlib.dhp.oa.merge;
import java.text.Normalizer;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.model.Person;
import scala.Tuple2;
public class AuthorMerger {
private static final Double THRESHOLD = 0.95;
private static final Double THRESHOLD = 0.95;
private AuthorMerger() {
}
private AuthorMerger() {
}
public static List<Author> merge(List<List<Author>> authors) {
public static List<Author> merge(List<List<Author>> authors) {
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
List<Author> author = new ArrayList<>();
List<Author> author = new ArrayList<>();
for (List<Author> a : authors) {
author = mergeAuthor(author, a);
}
for (List<Author> a : authors) {
author = mergeAuthor(author, a);
}
return author;
return author;
}
}
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b);
List<Author> base;
List<Author> enrich;
int sa = authorsSize(a);
int sb = authorsSize(b);
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b);
List<Author> base;
List<Author> enrich;
int sa = authorsSize(a);
int sb = authorsSize(b);
if (sa == sb) {
base = pa > pb ? a : b;
enrich = pa > pb ? b : a;
} else {
base = sa > sb ? a : b;
enrich = sa > sb ? b : a;
}
enrichPidFromList(base, enrich, threshold);
return base;
}
if (sa == sb) {
base = pa > pb ? a : b;
enrich = pa > pb ? b : a;
} else {
base = sa > sb ? a : b;
enrich = sa > sb ? b : a;
}
enrichPidFromList(base, enrich, threshold);
return base;
}
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
return mergeAuthor(a, b, THRESHOLD);
}
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
return mergeAuthor(a, b, THRESHOLD);
}
private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
if (base == null || enrich == null)
return;
private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
if (base == null || enrich == null)
return;
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
// <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
.map(p -> new Tuple2<>(p, a)))
.collect(Collectors.toList());
// <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
a -> a
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
.map(p -> new Tuple2<>(p, a)))
.collect(Collectors.toList());
pidToEnrich
.forEach(
a -> {
Optional<Tuple2<Double, Author>> simAuthor = base
.stream()
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
.max(Comparator.comparing(Tuple2::_1));
pidToEnrich
.forEach(
a -> {
Optional<Tuple2<Double, Author>> simAuthor = base
.stream()
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
.max(Comparator.comparing(Tuple2::_1));
if (simAuthor.isPresent()) {
double th = threshold;
// increase the threshold if the surname is too short
if (simAuthor.get()._2().getSurname() != null
&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
th = 0.99;
if (simAuthor.isPresent()) {
double th = threshold;
// increase the threshold if the surname is too short
if (simAuthor.get()._2().getSurname() != null
&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
th = 0.99;
if (simAuthor.get()._1() > th) {
Author r = simAuthor.get()._2();
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
if (simAuthor.get()._1() > th) {
Author r = simAuthor.get()._2();
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
tmp.add(a._1());
r.setPid(tmp);
}
}
});
}
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
tmp.add(a._1());
r.setPid(tmp);
}
}
});
}
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
}
public static String normalizeFullName(final String fullname) {
return nfd(fullname)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
public static int countAuthorsPids(List<Author> authors) {
if (authors == null)
return 0;
.trim();
}
return (int) authors.stream().filter(AuthorMerger::hasPid).count();
}
private static int authorsSize(List<Author> authors) {
if (authors == null)
return 0;
return authors.size();
}
private static String authorFieldToBeCompared(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return author.getSurname();
private static Double sim(Author a, Author b) {
}
if (StringUtils.isNotBlank(author.getFullname())) {
return author.getFullname();
}
return null;
}
final Person pa = parse(a);
final Person pb = parse(b);
/**
* This method tries to figure out when two author are the same in the contest
* of ORCID enrichment
*
* @param left Author in the OAF entity
* @param right Author ORCID
* @return based on a heuristic on the names of the authors if they are the same.
*/
public static boolean checkORCIDSimilarity(final Author left, final Author right) {
final Person pl = parse(left);
final Person pr = parse(right);
// if both are accurate (e.g. they have name and surname)
if (pa.isAccurate() & pb.isAccurate()) {
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
} else {
return new JaroWinkler()
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
}
}
// If one of them didn't have a surname we verify if they have the fullName not empty
// and verify if the normalized version is equal
if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
private static boolean hasPid(Author a) {
if (a == null || a.getPid() == null || a.getPid().isEmpty())
return false;
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
}
if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
&& !pr.getFullname().isEmpty()) {
return pl
.getFullname()
.stream()
.anyMatch(
fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
} else {
return false;
}
}
// The Authors have one surname in common
if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
private static Person parse(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return new Person(author.getSurname() + ", " + author.getName(), false);
} else {
if (StringUtils.isNotBlank(author.getFullname()))
return new Person(author.getFullname(), false);
else
return new Person("", false);
}
}
// If one of them has only a surname and is the same we can say that they are the same author
if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
return true;
// The authors have the same initials of Name in common
if (pl
.getName()
.stream()
.anyMatch(
nl -> pr
.getName()
.stream()
.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
return true;
}
private static String normalize(final String s) {
String[] normalized = nfd(s)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim()
.split(" ");
// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
// We verify if we have an exact match between name and surname
if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
return true;
else
return false;
}
//
Arrays.sort(normalized);
/**
* Method to enrich ORCID information in one list of authors based on another list
*
* @param baseAuthor the Author List in the OAF Entity
* @param orcidAuthor The list of ORCID Author intersected
* @return The Author List of the OAF Entity enriched with the orcid Author
*/
public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
return String.join(" ", normalized);
}
if (baseAuthor == null || baseAuthor.isEmpty())
return orcidAuthor;
private static String nfd(final String s) {
return Normalizer.normalize(s, Normalizer.Form.NFD);
}
if (orcidAuthor == null || orcidAuthor.isEmpty())
return baseAuthor;
if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
return baseAuthor;
final List<Author> oAuthor = new ArrayList<>();
oAuthor.addAll(orcidAuthor);
baseAuthor.forEach(ba -> {
Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
if (aMatch.isPresent()) {
final Author sameAuthor = aMatch.get();
addPid(ba, sameAuthor.getPid());
oAuthor.remove(sameAuthor);
}
});
return baseAuthor;
}
private static void addPid(final Author a, final List<StructuredProperty> pids) {
if (a.getPid() == null) {
a.setPid(new ArrayList<>());
}
a.getPid().addAll(pids);
}
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
}
public static int countAuthorsPids(List<Author> authors) {
if (authors == null)
return 0;
return (int) authors.stream().filter(AuthorMerger::hasPid).count();
}
private static int authorsSize(List<Author> authors) {
if (authors == null)
return 0;
return authors.size();
}
private static Double sim(Author a, Author b) {
final Person pa = parse(a);
final Person pb = parse(b);
// if both are accurate (e.g. they have name and surname)
if (pa.isAccurate() & pb.isAccurate()) {
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
} else {
return new JaroWinkler()
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
}
}
private static boolean hasPid(Author a) {
if (a == null || a.getPid() == null || a.getPid().isEmpty())
return false;
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
}
private static Person parse(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return new Person(author.getSurname() + ", " + author.getName(), false);
} else {
if (StringUtils.isNotBlank(author.getFullname()))
return new Person(author.getFullname(), false);
else
return new Person("", false);
}
}
public static String normalize(final String s) {
String[] normalized = nfd(s)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim()
.split(" ");
Arrays.sort(normalized);
return String.join(" ", normalized);
}
private static String nfd(final String s) {
return Normalizer.normalize(s, Normalizer.Form.NFD);
}
}

View File

@ -0,0 +1,114 @@
package eu.dnetlib.oa.merge;
import static org.junit.jupiter.api.Assertions.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Objects;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.oaf.Author;
public class AuthorMergerTest {
@Test
public void testEnrcichAuthor() throws Exception {
final ObjectMapper mapper = new ObjectMapper();
BufferedReader pr = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
BufferedReader or = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
};
String pubLine;
int i = 0;
while ((pubLine = pr.readLine()) != null) {
final String pubId = pubLine;
final String MatchPidOrcid = or.readLine();
final String pubOrcid = or.readLine();
final String data = pr.readLine();
if (StringUtils.isNotBlank(data)) {
List<Author> publicationAuthors = mapper.readValue(data, aclass);
List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
System.out.printf("OAF ID = %s \n", pubId);
System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
long originalAuthorWithPiD = publicationAuthors
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long start = System.currentTimeMillis();
// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
long enrichedAuthorWithPid = enrichedList
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long totalTime = (System.currentTimeMillis() - start) / 1000;
System.out
.printf(
"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
enrichedAuthorWithPid);
System.out.println("=================");
}
}
}
@Test
public void checkSimilarityTest() {
final Author left = new Author();
left.setName("Anand");
left.setSurname("Rachna");
left.setFullname("Anand, Rachna");
System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
final Author right = new Author();
right.setName("Rachna");
right.setSurname("Anand");
right.setFullname("Rachna, Anand");
// System.out.println(AuthorMerger.normalize(right.getFullname()));
boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
assertTrue(same);
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class DownloadORCIDDumpApplication {
private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
private final FileSystem fileSystem;
public DownloadORCIDDumpApplication(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final String apiURL = argumentParser.get("apiURL");
log.info("apiURL is {}", apiURL);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
}
private void downloadItem(final String name, final String itemURL, final String basePath) {
try {
final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
final HttpGet request = new HttpGet(itemURL);
final int timeout = 60; // seconds
final RequestConfig config = RequestConfig
.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000)
.build();
log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
CloseableHttpResponse response = client.execute(request)) {
int responseCode = response.getStatusLine().getStatusCode();
log.info("Response code is {}", responseCode);
if (responseCode >= 200 && responseCode < 400) {
IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
}
} catch (Throwable eu) {
throw new RuntimeException(eu);
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
protected void run(final String targetPath, final String apiURL) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final URL url = new URL(apiURL);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
final String json = IOUtils.toString(is);
JsonNode jsonNode = mapper.readTree(json);
jsonNode
.get("files")
.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
}
}

View File

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class ExtractORCIDDump {
private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
private final FileSystem fileSystem;
public ExtractORCIDDump(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String sourcePath = argumentParser.get("sourcePath");
log.info("sourcePath is {}", sourcePath);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
}
public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
final List<ORCIDExtractor> workers = new ArrayList<>();
int i = 0;
while (ls.hasNext()) {
LocatedFileStatus current = ls.next();
if (current.getPath().getName().endsWith("tar.gz")) {
workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
}
}
workers.forEach(Thread::start);
for (ORCIDExtractor worker : workers) {
worker.join();
}
}
}

View File

@ -0,0 +1,171 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**\
* The ORCIDExtractor class extracts ORCID data from a TAR archive.
* The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
* Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
* Finally, it closes all the SequenceFile.Writer objects.
*/
public class ORCIDExtractor extends Thread {
private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
private final FileSystem fileSystem;
private final String id;
private final Path sourcePath;
private final String baseOutputPath;
public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
this.fileSystem = fileSystem;
this.id = id;
this.sourcePath = sourcePath;
this.baseOutputPath = baseOutputPath;
}
/**
* creates a map of SequenceFile.Writer objects,
* one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
* For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
* object that writes employment data.
* @return the Map
*/
private Map<String, SequenceFile.Writer> createMap() {
try {
log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
Map<String, SequenceFile.Writer> res = new HashMap<>();
if (sourcePath.getName().contains("summaries")) {
final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
final SequenceFile.Writer summary_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(summaryPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
log.info("Thread {} Creating only summary path here {}", id, summaryPath);
res.put("summary", summary_file);
return res;
} else {
String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
final SequenceFile.Writer employments_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(employmentsPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("employments", employments_file);
log.info("Thread {} Creating employments path here {}", id, employmentsPath);
final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
final SequenceFile.Writer works_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(worksPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("works", works_file);
log.info("Thread {} Creating works path here {}", id, worksPath);
return res;
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
@Override
public void run() {
CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
CompressionCodec codec = factory.getCodec(sourcePath);
if (codec == null) {
System.err.println("No codec found for " + sourcePath.getName());
System.exit(1);
}
InputStream gzipInputStream = null;
try {
gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
final Map<String, SequenceFile.Writer> fileMap = createMap();
iterateTar(fileMap, gzipInputStream);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
log.info("Closing gzip stream");
IOUtils.closeStream(gzipInputStream);
}
}
private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
if (sourcePath.getName().contains("summaries")) {
return fileMap.get("summary");
}
if (path.contains("works")) {
return fileMap.get("works");
}
if (path.contains("employments"))
return fileMap.get("employments");
return null;
}
private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
int extractedItem = 0;
try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry;
while ((entry = tais.getNextTarEntry()) != null) {
if (entry.isFile()) {
final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
if (fl != null) {
final Text key = new Text(entry.getName());
final Text value = new Text(
org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
fl.append(key, value);
extractedItem++;
if (extractedItem % 100000 == 0) {
log.info("Thread {}: Extracted {} items", id, extractedItem);
break;
}
}
}
}
} finally {
for (SequenceFile.Writer k : fileMap.values()) {
log.info("Thread {}: Completed processed {} items", id, extractedItem);
k.hflush();
k.close();
}
}
}
}

View File

@ -0,0 +1,251 @@
package eu.dnetlib.dhp.collection.orcid;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ximpleware.*;
import eu.dnetlib.dhp.collection.orcid.model.*;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
public class OrcidParser {
final Logger log = LoggerFactory.getLogger(OrcidParser.class);
private VTDNav vn;
private AutoPilot ap;
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
private static final String NS_COMMON = "common";
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
private static final String NS_PERSON = "person";
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
private static final String NS_DETAILS = "personal-details";
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
private static final String NS_OTHER = "other-name";
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
private static final String NS_RECORD = "record";
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
private static final String NS_ACTIVITIES = "activities";
private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
private static final String NS_WORK = "work";
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
private static final String NS_ERROR = "error";
private static final String NS_HISTORY = "history";
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
private static final String NS_BULK = "bulk";
private static final String NS_EXTERNAL = "external-identifier";
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
private void generateParsedDocument(final String xml) throws ParseException {
final VTDGen vg = new VTDGen();
vg.setDoc(xml.getBytes());
vg.parse(true);
this.vn = vg.getNav();
this.ap = new AutoPilot(vn);
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
}
public Author parseSummary(final String xml) {
try {
final Author author = new Author();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//record:record", Arrays.asList("path"));
if (!recordNodes.isEmpty()) {
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
author.setOrcid(oid);
} else {
return null;
}
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//person:name", Arrays.asList("visibility"));
final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
author.setVisibility(visibility);
final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
author.setGivenName(name);
final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
author.setFamilyName(surnames);
final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
author.setCreditName(creditNames);
final String biography = VtdUtilityParser
.getSingleValue(ap, vn, "//person:biography/personal-details:content");
author.setBiography(biography);
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
if (!otherNames.isEmpty()) {
author.setOtherNames(otherNames);
}
ap.selectXPath("//external-identifier:external-identifier");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
author.addOtherPid(pid);
}
return author;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
public Work parseWork(final String xml) {
try {
final Work work = new Work();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
if (!workNodes.isEmpty()) {
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
work.setOrcid(oid);
} else {
return null;
}
ap.selectXPath("//common:external-id");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
work.addPid(pid);
}
work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
return work;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
private String extractEmploymentDate(final String xpath) throws Exception {
ap.selectXPath(xpath);
StringBuilder sb = new StringBuilder();
while (ap.evalXPath() != -1) {
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:year");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:month");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:day");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
}
return sb.toString();
}
public Employment parseEmployment(final String xml) {
try {
final Employment employment = new Employment();
generateParsedDocument(xml);
final String oid = VtdUtilityParser
.getSingleValue(ap, vn, "//common:source-orcid/common:path");
if (StringUtils.isNotBlank(oid)) {
employment.setOrcid(oid);
} else {
return null;
}
final String depName = VtdUtilityParser
.getSingleValue(ap, vn, "//common:department-name");
final String rolTitle = VtdUtilityParser
.getSingleValue(ap, vn, "//common:role-title");
if (StringUtils.isNotBlank(rolTitle))
employment.setRoleTitle(rolTitle);
if (StringUtils.isNotBlank(depName))
employment.setDepartmentName(depName);
else
employment
.setDepartmentName(
VtdUtilityParser
.getSingleValue(ap, vn, "//common:organization/common:name"));
employment.setStartDate(extractEmploymentDate("//common:start-date"));
employment.setEndDate(extractEmploymentDate("//common:end-date"));
final String affiliationId = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
final String affiliationIdType = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguation-source");
if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
return employment;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
}

View File

@ -0,0 +1,83 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Author extends ORCIDItem {
private String givenName;
private String familyName;
private String visibility;
private String creditName;
private List<String> otherNames;
private List<Pid> otherPids;
private String biography;
public String getBiography() {
return biography;
}
public void setBiography(String biography) {
this.biography = biography;
}
public String getGivenName() {
return givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public String getFamilyName() {
return familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public String getCreditName() {
return creditName;
}
public void setCreditName(String creditName) {
this.creditName = creditName;
}
public List<String> getOtherNames() {
return otherNames;
}
public void setOtherNames(List<String> otherNames) {
this.otherNames = otherNames;
}
public String getVisibility() {
return visibility;
}
public void setVisibility(String visibility) {
this.visibility = visibility;
}
public List<Pid> getOtherPids() {
return otherPids;
}
public void setOtherPids(List<Pid> otherPids) {
this.otherPids = otherPids;
}
public void addOtherPid(final Pid pid) {
if (otherPids == null)
otherPids = new ArrayList<>();
otherPids.add(pid);
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Employment extends ORCIDItem {
private String startDate;
private String EndDate;
private Pid affiliationId;
private String departmentName;
private String roleTitle;
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return EndDate;
}
public void setEndDate(String endDate) {
EndDate = endDate;
}
public Pid getAffiliationId() {
return affiliationId;
}
public void setAffiliationId(Pid affiliationId) {
this.affiliationId = affiliationId;
}
public String getDepartmentName() {
return departmentName;
}
public void setDepartmentName(String departmentName) {
this.departmentName = departmentName;
}
public String getRoleTitle() {
return roleTitle;
}
public void setRoleTitle(String roleTitle) {
this.roleTitle = roleTitle;
}
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class ORCIDItem {
private String orcid;
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Pid {
private String value;
private String schema;
public Pid() {
}
public Pid(String value, String schema) {
this.value = value;
this.schema = schema;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getSchema() {
return schema;
}
public void setSchema(String schema) {
this.schema = schema;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Work extends ORCIDItem {
private String title;
private List<Pid> pids;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public List<Pid> getPids() {
return pids;
}
public void setPids(List<Pid> pids) {
this.pids = pids;
}
public void addPid(Pid pid) {
if (pids == null)
pids = new ArrayList<>();
pids.add(pid);
}
}

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH to extract files",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH where the tar.gz files were downloaded",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "m",
"paramLongName": "master",
"paramDescription": "the master name",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH of the DF tables",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH of the ORCID sequence file",
"paramRequired": true
}
]

View File

@ -0,0 +1,23 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,81 @@
<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>targetPath</name>
<description>the path to store the original ORCID dump</description>
</property>
<property>
<name>apiURL</name>
<description>The figshare API URL to retrieve the list file to download</description>
</property>
</parameters>
<start to="generateTables"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadDUMP">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--apiURL</arg><arg>${apiURL}</arg>
</java>
<ok to="extractDump"/>
<error to="Kill"/>
</action>
<action name="extractDump">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
<java-opts> -Xmx6g </java-opts>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${targetPath}</arg>
<arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
</java>
<ok to="generateTables"/>
<error to="Kill"/>
</action>
<action name="generateTables">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Generate ORCID Tables</name>
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}/tables</arg>
<arg>--master</arg><arg>yarn</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.collection.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
import org.apache.hadoop.io.Text
import org.apache.spark.SparkContext
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = {
val sourcePath: String = parser.get("sourcePath")
log.info("found parameters sourcePath: {}", sourcePath)
val targetPath: String = parser.get("targetPath")
log.info("found parameters targetPath: {}", targetPath)
extractORCIDTable(spark, sourcePath, targetPath)
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
extractORCIDWorksTable(spark, sourcePath, targetPath)
}
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("summaries"))
.map { r =>
val p = new OrcidParser
p.parseSummary(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Authors")
}
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("works"))
.map { r =>
val p = new OrcidParser
p.parseWork(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Works")
}
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("employments"))
.map { r =>
val p = new OrcidParser
p.parseEmployment(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Employments")
}
}
object SparkGenerateORCIDTable {
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
def main(args: Array[String]): Unit = {
new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
.initialize()
.run()
}
}

View File

@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
), resourceType
),
resourceType
)
}
if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
), schemaOrg
),
schemaOrg
)
}
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
), resourceTypeGeneral
),
resourceTypeGeneral
)
}
@ -228,7 +231,6 @@ object DataciteToOAFTransformation {
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
typeQualifiers._2.getClassname match {
case "dataset" =>
val r = new OafDataset

View File

@ -593,7 +593,6 @@ object BioDBToOAF {
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava)
i.setDateofacceptance(

View File

@ -195,7 +195,7 @@ object PubMedToOaf {
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else {
val i_type = article.getPublicationTypes.asScala
.map(s => (s.getValue,getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.find(q => q._2 != null)
if (i_type.isDefined) {
@ -205,8 +205,7 @@ object PubMedToOaf {
itm.setOriginalType(i_type.get._1)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
}
else
} else
return null
}
val result = createResult(pubmedInstance.getInstancetype, vocabularies)

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.dnetlib.dhp.collection.orcid.model.Author;
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
import eu.dnetlib.dhp.parser.utility.VtdException;
public class DownloadORCIDTest {
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
@Test
public void testSummary() throws Exception {
final String xml = IOUtils
.toString(
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
final OrcidParser parser = new OrcidParser();
ORCIDItem orcidItem = parser.parseSummary(xml);
final ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(orcidItem));
}
@Test
public void testParsingWork() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
@Test
public void testParsingEmployments() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
}

View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
<common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
<common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
</common:source>
<work:title>
<common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
</work:title>
<work:journal-title>American Journal of Neuroradiology</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2014</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
<common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-84911865199</common:external-id-value>
<common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Durst, C.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Starke, R.M.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Gaughen, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Nguyen, Q.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Patrie, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Jensen, M.E.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Evans, A.J.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
path="/0000-0001-5349-4030/work/50101152" visibility="public">
<common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
<common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
</common:source>
<work:title>
<common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
racially homogenous and heterogeneous U.S. history classrooms</common:title>
</work:title>
<work:journal-title>Journal of Social Studies Research</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
candor in addressing social injustices in racially homogenous and heterogeneous U.S.
history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
= {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2018</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
<common:external-id-normalized transient="true"
>10.1016/j.jssr.2018.01.004</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-85041949043</common:external-id-value>
<common:external-id-normalized transient="true"
>2-s2.0-85041949043</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Parkhouse, H.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Massaro, V.R.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Abdel-Dayem K</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Abdel-Dayem Fake</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Khair Abde Daye</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:contributor-attributes>
<work:contributor-sequence>seq0</work:contributor-sequence>
<work:contributor-role>role0</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname1</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>creditname2</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq2</work:contributor-sequence>
<work:contributor-role></work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname3</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence></work:contributor-sequence>
<work:contributor-role>role3</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name></work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq4</work:contributor-sequence>
<work:contributor-role>role4</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
visibility="public">
<common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
<common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Quang Nguyen</common:source-name>
</common:source>
<common:organization>
<common:name>Beth Israel Deaconess Medical Center</common:name>
<common:address>
<common:city>Boston</common:city>
<common:region>MA</common:region>
<common:country>US</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
visibility="public">
<common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
<common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
<common:path>0000-0001-5011-3001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>zhengyan li</common:source-name>
</common:source>
<common:start-date>
<common:year>2008</common:year>
<common:month>09</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Anhui Academy of Agricultural Sciences</common:name>
<common:address>
<common:city>Hefei</common:city>
<common:region>Anhui</common:region>
<common:country>CN</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
visibility="public">
<common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
<common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
<common:path>0000-0001-5012-1001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Asma Bazzi</common:source-name>
</common:source>
<common:department-name>Pathology and Laboratory Medicine</common:department-name>
<common:role-title>Medical Laboratory Technologist</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>01</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>06</common:month>
<common:day>30</common:day>
</common:end-date>
<common:organization>
<common:name>American University of Beirut</common:name>
<common:address>
<common:city>Hamra</common:city>
<common:region>Beirut</common:region>
<common:country>LB</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,581 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
<common:orcid-identifier>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:orcid-identifier>
<preferences:preferences>
<preferences:locale>es</preferences:locale>
</preferences:preferences>
<history:history>
<history:creation-method>Direct</history:creation-method>
<history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<history:claimed>true</history:claimed>
<history:verified-email>true</history:verified-email>
<history:verified-primary-email>true</history:verified-primary-email>
</history:history>
<person:person path="/0000-0001-5045-1000/person">
<person:name visibility="public" path="0000-0001-5045-1000">
<common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
<common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
<personal-details:given-names>Patricio</personal-details:given-names>
<personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
</person:name>
<other-name:other-names path="/0000-0001-5045-1000/other-names"/>
<person:biography visibility="public" path="/0000-0001-5045-1000/biography">
<common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
<common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
<personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
</person:biography>
<researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
<email:emails path="/0000-0001-5045-1000/email"/>
<address:addresses path="/0000-0001-5045-1000/address"/>
<keyword:keywords path="/0000-0001-5045-1000/keywords"/>
<external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
<common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
<common:path>0000-0001-7291-3210</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
</common:source>
<common:external-id-type>Scopus Author ID</common:external-id-type>
<common:external-id-value>6602255248</common:external-id-value>
<common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
<common:external-id-relationship>self</common:external-id-relationship>
</external-identifier:external-identifier>
</external-identifier:external-identifiers>
</person:person>
<activities:activities-summary path="/0000-0001-5045-1000/activities">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:distinctions path="/0000-0001-5045-1000/distinctions">
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:external-ids/>
<distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
<common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Maestría</common:department-name>
<common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</distinction:distinction-summary>
</activities:affiliation-group>
</activities:distinctions>
<activities:educations path="/0000-0001-5045-1000/educations">
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:external-ids/>
<education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
<common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Doctorado en Sociología</common:department-name>
<common:role-title>Doctorando del Programa de Sociología</common:role-title>
<common:start-date>
<common:year>2020</common:year>
<common:month>11</common:month>
<common:day>06</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
<common:address>
<common:city>Madrid</common:city>
<common:region>Comunidad de Madrid</common:region>
<common:country>ES</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</education:education-summary>
</activities:affiliation-group>
</activities:educations>
<activities:employments path="/0000-0001-5045-1000/employments">
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
<common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Especialista de Proyectos y docente</common:role-title>
<common:start-date>
<common:year>2021</common:year>
<common:month>11</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad de las Artes</common:name>
<common:address>
<common:city>Guayaquil</common:city>
<common:region>Guayas</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
<common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Director</common:role-title>
<common:start-date>
<common:year>2019</common:year>
<common:month>11</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>2021</common:year>
<common:month>10</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Regional Amazónica IKIAM</common:name>
<common:address>
<common:city>Tena</common:city>
<common:region>Napo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
<common:url>http://ikiam.edu.ec</common:url>
</employment:employment-summary>
</activities:affiliation-group>
</activities:employments>
<activities:fundings path="/0000-0001-5045-1000/fundings"/>
<activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
<activities:memberships path="/0000-0001-5045-1000/memberships">
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:external-ids/>
<membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
<common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Artes Escénicas</common:department-name>
<common:role-title>Miembro</common:role-title>
<common:start-date>
<common:year>2000</common:year>
<common:month>07</common:month>
<common:day>15</common:day>
</common:start-date>
<common:organization>
<common:name>Casa de la Cultura Ecuatoriana</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Sierra Centro</common:region>
<common:country>EC</common:country>
</common:address>
</common:organization>
</membership:membership-summary>
</activities:affiliation-group>
</activities:memberships>
<activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
<activities:qualifications path="/0000-0001-5045-1000/qualifications">
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
<common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Gobernabilidad</common:department-name>
<common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:end-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:end-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
<common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Posgrados</common:department-name>
<common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
<common:start-date>
<common:year>2001</common:year>
<common:month>03</common:month>
<common:day>09</common:day>
</common:start-date>
<common:end-date>
<common:year>2003</common:year>
<common:month>02</common:month>
<common:day>27</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
<common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Ciencias de la Educación</common:department-name>
<common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>03</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>01</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
<common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Facultad de Artes</common:department-name>
<common:role-title>Licenciado en Artes</common:role-title>
<common:start-date>
<common:year>1989</common:year>
<common:month>09</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>1997</common:year>
<common:month>08</common:month>
<common:day>07</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Central del Ecuador</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
</activities:qualifications>
<activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
<activities:services path="/0000-0001-5045-1000/services"/>
<activities:works path="/0000-0001-5045-1000/works">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:group>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>isbn</common:external-id-type>
<common:external-id-value>978-9942-29-089-2</common:external-id-value>
<common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
<common:external-id-relationship>part-of</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:type>book-chapter</work:type>
<common:publication-date>
<common:year>2023</common:year>
<common:month>06</common:month>
<common:day>07</common:day>
</common:publication-date>
<work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
<common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>conference-abstract</work:type>
<common:publication-date>
<common:year>2022</common:year>
<common:month>10</common:month>
<common:day>06</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>conference-poster</work:type>
<common:publication-date>
<common:year>2018</common:year>
<common:month>11</common:month>
<common:day>30</common:day>
</common:publication-date>
<work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>dissertation-thesis</work:type>
<common:publication-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>La Rebelión de los Dioses</common:title>
</work:title>
<common:external-ids/>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>registered-copyright</work:type>
<common:publication-date>
<common:year>2001</common:year>
<common:month>08</common:month>
<common:day>28</common:day>
</common:publication-date>
<work:journal-title>Editorial pedagógica freire</work:journal-title>
</work:work-summary>
</activities:group>
</activities:works>
</activities:activities-summary>
</record:record>

View File

@ -133,32 +133,6 @@
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="PreProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="PreProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>

View File

@ -59,10 +59,10 @@
</property>
<!-- ORCID Parameters -->
<property>
<name>workingPathOrcid</name>
<description>the ORCID working path</description>
</property>
<!-- <property>-->
<!-- <name>workingPathOrcid</name>-->
<!-- <description>the ORCID working path</description>-->
<!-- </property>-->
</parameters>
@ -170,32 +170,6 @@
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="ProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="ProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="CreateDOIBoost"/>
<error to="Kill"/>
</action>

View File

@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
Encoders.tuple(Encoders.STRING, mapEncoderPub)
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
logger.info("Phase 2) Join Crossref with UnpayWall")
logger.info("Phase 1) Join Crossref with UnpayWall")
val crossrefPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
.write
.mode(SaveMode.Overwrite)
.save(s"$workingDirPath/firstJoin")
logger.info("Phase 3) Join Result with ORCID")
val fj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val orcidPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
.map(applyMerge)
.write
.mode(SaveMode.Overwrite)
.save(s"$workingDirPath/secondJoin")
logger.info("Phase 4) Join Result with MAG")
logger.info("Phase 2) Join Result with MAG")
val sj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val magPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))

View File

@ -107,7 +107,7 @@ case object Crossref2Oaf {
.map(f => f.id)
}
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType:String): Result = {
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID
@ -372,7 +372,7 @@ case object Crossref2Oaf {
objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
)
mappingResult(result, json, cOBJCategory, originalType)
mappingResult(result, json, cOBJCategory, objectSubType)
if (result == null || result.getId == null)
return List()

View File

@ -0,0 +1,26 @@
[
{
"paramName": "mt",
"paramLongName": "master",
"paramDescription": "should be local or yarn",
"paramRequired": true
},
{
"paramName": "op",
"paramLongName": "orcidPath",
"paramDescription": "the path of the orcid Table generated by the dump",
"paramRequired": true
},
{
"paramName": "gp",
"paramLongName": "graphPath",
"paramDescription": "the path of the graph we want to apply enrichment",
"paramRequired": true
},
{
"paramName": "tp",
"paramLongName": "targetPath",
"paramDescription": "the output path of the graph enriched",
"paramRequired": true
}
]

View File

@ -0,0 +1,34 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,88 @@
<workflow-app name="Enrich_graph_with_ORCID_Workflow" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>orcidPath</name>
<description>the path of the orcid Table generated by the dump</description>
</property>
<property>
<name>graphPath</name>
<description>the path of the graph we want to apply enrichment</description>
</property>
<property>
<name>targetPath</name>
<description>the output path of the graph enriched</description>
</property>
</parameters>
<start to="EnrichGraph"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="EnrichGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Enrich Graph with ORCID</name>
<class>eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--orcidPath</arg>
<arg>${orcidPath}</arg>
<arg>--targetPath</arg>
<arg>${targetPath}</arg>
<arg>--graphPath</arg>
<arg>${graphPath}</arg>
<arg>--master</arg>
<arg>yarn</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="copy_datasource">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/datasource</arg>
<arg>${nameNode}/${targetPath}/datasource</arg>
</distcp>
<ok to="copy_organization"/>
<error to="Kill"/>
</action>
<action name="copy_organization">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/organization</arg>
<arg>${nameNode}/${targetPath}/organization</arg>
</distcp>
<ok to="copy_project"/>
<error to="Kill"/>
</action>
<action name="copy_project">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/project</arg>
<arg>${nameNode}/${targetPath}/project</arg>
</distcp>
<ok to="copy_relation"/>
<error to="Kill"/>
</action>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphPath}/relation</arg>
<arg>${nameNode}/${targetPath}/relation</arg>
</distcp>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
import eu.dnetlib.dhp.schema.sx.OafUtils
import org.apache.spark.sql.Row
import scala.collection.JavaConverters._
object AuthorEnricher extends Serializable {
def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
val a = new Author
a.setName(givenName)
a.setSurname(familyName)
a.setFullname(s"$givenName $familyName")
val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
pid.setDataInfo(OafUtils.generateDataInfo())
pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
a.setPid(List(pid).asJava)
a
}
def toOAFAuthor(r: Row): java.util.List[Author] = {
r.getList[Row](1)
.asScala
.map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
.toList
.asJava
}
// def enrichAuthor(p:Publication,r:Row): Unit = {
// val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
// println(k)
//
//
//
// }
}

View File

@ -0,0 +1,138 @@
package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.oa.merge.AuthorMerger
import eu.dnetlib.dhp.schema.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = {
val graphPath = parser.get("graphPath")
log.info(s"graphPath is '$graphPath'")
val orcidPath = parser.get("orcidPath")
log.info(s"orcidPath is '$orcidPath'")
val targetPath = parser.get("targetPath")
log.info(s"targetPath is '$targetPath'")
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
enrichResult(
spark,
s"$graphPath/publication",
orcidPublication,
s"$targetPath/publication",
Encoders.bean(classOf[Publication])
)
enrichResult(
spark,
s"$graphPath/dataset",
orcidPublication,
s"$targetPath/dataset",
Encoders.bean(classOf[eu.dnetlib.dhp.schema.oaf.Dataset])
)
enrichResult(
spark,
s"$graphPath/software",
orcidPublication,
s"$targetPath/software",
Encoders.bean(classOf[Software])
)
enrichResult(
spark,
s"$graphPath/otherresearchproduct",
orcidPublication,
s"$targetPath/otherresearchproduct",
Encoders.bean(classOf[OtherResearchProduct])
)
}
private def enrichResult[T <: Result](
spark: SparkSession,
graphPath: String,
orcidPublication: Dataset[Row],
outputPath: String,
enc: Encoder[T]
): Unit = {
val entities = spark.read
.schema(enc.schema)
.json(graphPath)
.select(col("id"), col("datainfo"), col("instance"))
.where("datainfo.deletedbyinference != true")
.drop("datainfo")
.withColumn("instances", explode(col("instance")))
.withColumn("pids", explode(col("instances.pid")))
.select(
col("pids.qualifier.classid").alias("pid_schema"),
col("pids.value").alias("pid_value"),
col("id").alias("dnet_id")
)
val orcidDnet = orcidPublication
.join(
entities,
lower(col("schema")).equalTo(lower(col("pid_schema"))) &&
lower(col("value")).equalTo(lower(col("pid_value"))),
"inner"
)
.groupBy(col("dnet_id"))
.agg(collect_set(orcidPublication("author")).alias("orcid_authors"))
.select("dnet_id", "orcid_authors")
.cache()
orcidDnet.count()
val result = spark.read.schema(enc.schema).json(graphPath).as[T](enc)
result
.joinWith(orcidDnet, result("id").equalTo(orcidDnet("dnet_id")), "left")
.map {
case (r: T, null) =>
r
case (p: T, r: Row) =>
p.setAuthor(AuthorMerger.enrichOrcid(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
p
}(enc)
.write
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath)
}
private def generateOrcidTable(spark: SparkSession, inputPath: String): Dataset[Row] = {
val orcidAuthors =
spark.read.load(s"$inputPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames")
val orcidWorks = spark.read
.load(s"$inputPath/Works")
.select(col("orcid"), explode(col("pids")).alias("identifier"))
.where(
"identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
)
val orcidPublication = orcidAuthors
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
.select(
col("identifier.schema").alias("schema"),
col("identifier.value").alias("value"),
struct(orcidAuthors("orcid").alias("orcid"), col("givenName"), col("familyName")).alias("author")
)
orcidPublication.cache()
}
}
object SparkEnrichGraphWithOrcidAuthors {
val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass)
def main(args: Array[String]): Unit = {
new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log)
.initialize()
.run()
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
import org.apache.spark.sql.{Column, Encoder, Encoders, Row, SparkSession}
import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.sql.functions._
class EnrichOrcidTest {
val log: Logger = LoggerFactory.getLogger(getClass)
def test() = {
val spark = SparkSession.builder().master("local[*]").getOrCreate()
// spark.sparkContext.setLogLevel("ERROR")
// new SparkEnrichGraphWithOrcidAuthors(null, null, null)
// .enrichResult(
// spark,
// "/Users/sandro/orcid_test/publication",
// "",
// "/tmp/graph/",
// Encoders.bean(classOf[Publication])
// )
val schema = Encoders.bean(classOf[Publication]).schema
//
// val simplifyAuthor = udf((r: Seq[Row]) => {
// r
// .map(k =>
// AuthorPid(
// k.getAs[String]("fullname"),
// k.getAs[Seq[Row]]("pid")
// .map(p => Pid(p.getAs[Row]("qualifier").getAs[String]("classid"), p.getAs[String]("value")))
// .toList
// )
// )
// .filter(l => l.pids.nonEmpty)
// .toList
// })
//
// val wrong_orcid_intersection = udf((a: Seq[Row]) => {
// a.map(author => {
// val pids_with_orcid: Seq[Row] = author
// .getAs[Seq[Row]]("pids")
// .filter(p =>
// p.getAs[String]("pidScheme") != null && p.getAs[String]("pidScheme").toLowerCase.contains("orcid")
// )
// if (pids_with_orcid.exists(p => p.getAs[String]("pidScheme").equals("ORCID"))) {
// if (pids_with_orcid.map(p => p.getAs[String]("pidValue").toLowerCase).distinct.size > 1) {
// AuthorPid(
// author.getAs[String]("fullName"),
// pids_with_orcid.map(p => Pid(p.getAs[String]("pidScheme"), p.getAs[String]("pidValue"))).toList
// )
//
// } else
// null
// } else
// null
// }).filter(author => author != null)
// })
Encoders
import spark.implicits._
// val enriched = spark.read
// .schema(schema)
// .json("/Users/sandro/orcid_test/publication_enriched")
// .select(col("id"), explode(col("author")).as("authors"))
// .withColumn("ap", col("authors.pid.qualifier.classid"))
// .withColumn("dp", col("authors.pid.datainfo.provenanceAction.classid"))
//
// .show()
}
}