forked from D-Net/dnet-hadoop
Merge pull request 'ORCID Enrichment and Download' (#364) from orcid_import into beta
Reviewed-on: D-Net/dnet-hadoop#364
This commit is contained in:
commit
c5ac593c07
|
@ -4,16 +4,14 @@ package eu.dnetlib.dhp.oa.merge;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.wcohen.ss.JaroWinkler;
|
import com.wcohen.ss.JaroWinkler;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.pace.model.Person;
|
import eu.dnetlib.pace.model.Person;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
|
||||||
public class AuthorMerger {
|
public class AuthorMerger {
|
||||||
|
|
||||||
private static final Double THRESHOLD = 0.95;
|
private static final Double THRESHOLD = 0.95;
|
||||||
|
@ -119,6 +117,132 @@ public class AuthorMerger {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String normalizeFullName(final String fullname) {
|
||||||
|
return nfd(fullname)
|
||||||
|
.toLowerCase()
|
||||||
|
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||||
|
// in case
|
||||||
|
// of large input strings
|
||||||
|
.replaceAll("(\\W)+", " ")
|
||||||
|
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
|
||||||
|
.replaceAll("(\\p{Punct})+", " ")
|
||||||
|
.replaceAll("(\\d)+", " ")
|
||||||
|
.replaceAll("(\\n)+", " ")
|
||||||
|
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static String authorFieldToBeCompared(Author author) {
|
||||||
|
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||||
|
return author.getSurname();
|
||||||
|
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(author.getFullname())) {
|
||||||
|
return author.getFullname();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method tries to figure out when two author are the same in the contest
|
||||||
|
* of ORCID enrichment
|
||||||
|
*
|
||||||
|
* @param left Author in the OAF entity
|
||||||
|
* @param right Author ORCID
|
||||||
|
* @return based on a heuristic on the names of the authors if they are the same.
|
||||||
|
*/
|
||||||
|
public static boolean checkORCIDSimilarity(final Author left, final Author right) {
|
||||||
|
final Person pl = parse(left);
|
||||||
|
final Person pr = parse(right);
|
||||||
|
|
||||||
|
// If one of them didn't have a surname we verify if they have the fullName not empty
|
||||||
|
// and verify if the normalized version is equal
|
||||||
|
if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
|
||||||
|
pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
|
||||||
|
|
||||||
|
if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
|
||||||
|
&& !pr.getFullname().isEmpty()) {
|
||||||
|
return pl
|
||||||
|
.getFullname()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The Authors have one surname in common
|
||||||
|
if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
|
||||||
|
|
||||||
|
// If one of them has only a surname and is the same we can say that they are the same author
|
||||||
|
if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
|
||||||
|
(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
|
||||||
|
return true;
|
||||||
|
// The authors have the same initials of Name in common
|
||||||
|
if (pl
|
||||||
|
.getName()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
nl -> pr
|
||||||
|
.getName()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
|
||||||
|
// We verify if we have an exact match between name and surname
|
||||||
|
if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
|
||||||
|
pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
//
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method to enrich ORCID information in one list of authors based on another list
|
||||||
|
*
|
||||||
|
* @param baseAuthor the Author List in the OAF Entity
|
||||||
|
* @param orcidAuthor The list of ORCID Author intersected
|
||||||
|
* @return The Author List of the OAF Entity enriched with the orcid Author
|
||||||
|
*/
|
||||||
|
public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
|
||||||
|
|
||||||
|
if (baseAuthor == null || baseAuthor.isEmpty())
|
||||||
|
return orcidAuthor;
|
||||||
|
|
||||||
|
if (orcidAuthor == null || orcidAuthor.isEmpty())
|
||||||
|
return baseAuthor;
|
||||||
|
|
||||||
|
if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
|
||||||
|
return baseAuthor;
|
||||||
|
|
||||||
|
final List<Author> oAuthor = new ArrayList<>();
|
||||||
|
oAuthor.addAll(orcidAuthor);
|
||||||
|
|
||||||
|
baseAuthor.forEach(ba -> {
|
||||||
|
Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
|
||||||
|
if (aMatch.isPresent()) {
|
||||||
|
final Author sameAuthor = aMatch.get();
|
||||||
|
addPid(ba, sameAuthor.getPid());
|
||||||
|
oAuthor.remove(sameAuthor);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return baseAuthor;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addPid(final Author a, final List<StructuredProperty> pids) {
|
||||||
|
|
||||||
|
if (a.getPid() == null) {
|
||||||
|
a.setPid(new ArrayList<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
a.getPid().addAll(pids);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
||||||
: "";
|
: "";
|
||||||
|
@ -171,7 +295,7 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String normalize(final String s) {
|
public static String normalize(final String s) {
|
||||||
String[] normalized = nfd(s)
|
String[] normalized = nfd(s)
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
// do not compact the regexes in a single expression, would cause StackOverflowError
|
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.oa.merge;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.platform.commons.util.StringUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
|
||||||
|
public class AuthorMergerTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEnrcichAuthor() throws Exception {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
BufferedReader pr = new BufferedReader(new InputStreamReader(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
AuthorMergerTest.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
|
||||||
|
BufferedReader or = new BufferedReader(new InputStreamReader(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
|
||||||
|
|
||||||
|
TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
|
||||||
|
};
|
||||||
|
String pubLine;
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while ((pubLine = pr.readLine()) != null) {
|
||||||
|
final String pubId = pubLine;
|
||||||
|
final String MatchPidOrcid = or.readLine();
|
||||||
|
final String pubOrcid = or.readLine();
|
||||||
|
|
||||||
|
final String data = pr.readLine();
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(data)) {
|
||||||
|
List<Author> publicationAuthors = mapper.readValue(data, aclass);
|
||||||
|
List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
|
||||||
|
System.out.printf("OAF ID = %s \n", pubId);
|
||||||
|
System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
|
||||||
|
System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
|
||||||
|
System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
|
||||||
|
System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
|
||||||
|
|
||||||
|
long originalAuthorWithPiD = publicationAuthors
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
a -> a.getPid() != null && a
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
p -> p.getQualifier() != null
|
||||||
|
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
|
||||||
|
.count();
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
|
// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
|
||||||
|
final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
|
||||||
|
|
||||||
|
long enrichedAuthorWithPid = enrichedList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
a -> a.getPid() != null && a
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
p -> p.getQualifier() != null
|
||||||
|
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
|
||||||
|
.count();
|
||||||
|
|
||||||
|
long totalTime = (System.currentTimeMillis() - start) / 1000;
|
||||||
|
System.out
|
||||||
|
.printf(
|
||||||
|
"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
|
||||||
|
enrichedAuthorWithPid);
|
||||||
|
|
||||||
|
System.out.println("=================");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void checkSimilarityTest() {
|
||||||
|
final Author left = new Author();
|
||||||
|
left.setName("Anand");
|
||||||
|
left.setSurname("Rachna");
|
||||||
|
left.setFullname("Anand, Rachna");
|
||||||
|
|
||||||
|
System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
|
||||||
|
|
||||||
|
final Author right = new Author();
|
||||||
|
right.setName("Rachna");
|
||||||
|
right.setSurname("Anand");
|
||||||
|
right.setFullname("Rachna, Anand");
|
||||||
|
// System.out.println(AuthorMerger.normalize(right.getFullname()));
|
||||||
|
boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
|
||||||
|
|
||||||
|
assertTrue(same);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,102 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.http.client.config.RequestConfig;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClientBuilder;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class DownloadORCIDDumpApplication {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
|
||||||
|
|
||||||
|
private final FileSystem fileSystem;
|
||||||
|
|
||||||
|
public DownloadORCIDDumpApplication(FileSystem fileSystem) {
|
||||||
|
this.fileSystem = fileSystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
DownloadORCIDDumpApplication.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
|
||||||
|
argumentParser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsuri = argumentParser.get("namenode");
|
||||||
|
log.info("hdfsURI is {}", hdfsuri);
|
||||||
|
|
||||||
|
final String targetPath = argumentParser.get("targetPath");
|
||||||
|
log.info("targetPath is {}", targetPath);
|
||||||
|
|
||||||
|
final String apiURL = argumentParser.get("apiURL");
|
||||||
|
log.info("apiURL is {}", apiURL);
|
||||||
|
|
||||||
|
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
|
||||||
|
|
||||||
|
new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void downloadItem(final String name, final String itemURL, final String basePath) {
|
||||||
|
try {
|
||||||
|
final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
|
||||||
|
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
|
||||||
|
final HttpGet request = new HttpGet(itemURL);
|
||||||
|
final int timeout = 60; // seconds
|
||||||
|
final RequestConfig config = RequestConfig
|
||||||
|
.custom()
|
||||||
|
.setConnectTimeout(timeout * 1000)
|
||||||
|
.setConnectionRequestTimeout(timeout * 1000)
|
||||||
|
.setSocketTimeout(timeout * 1000)
|
||||||
|
.build();
|
||||||
|
log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
|
||||||
|
try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
|
||||||
|
CloseableHttpResponse response = client.execute(request)) {
|
||||||
|
int responseCode = response.getStatusLine().getStatusCode();
|
||||||
|
log.info("Response code is {}", responseCode);
|
||||||
|
if (responseCode >= 200 && responseCode < 400) {
|
||||||
|
IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
|
||||||
|
}
|
||||||
|
} catch (Throwable eu) {
|
||||||
|
throw new RuntimeException(eu);
|
||||||
|
}
|
||||||
|
} catch (Throwable e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void run(final String targetPath, final String apiURL) throws Exception {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
final URL url = new URL(apiURL);
|
||||||
|
URLConnection conn = url.openConnection();
|
||||||
|
InputStream is = conn.getInputStream();
|
||||||
|
final String json = IOUtils.toString(is);
|
||||||
|
JsonNode jsonNode = mapper.readTree(json);
|
||||||
|
jsonNode
|
||||||
|
.get("files")
|
||||||
|
.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,71 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class ExtractORCIDDump {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
|
||||||
|
|
||||||
|
private final FileSystem fileSystem;
|
||||||
|
|
||||||
|
public ExtractORCIDDump(FileSystem fileSystem) {
|
||||||
|
this.fileSystem = fileSystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
DownloadORCIDDumpApplication.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
|
||||||
|
argumentParser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsuri = argumentParser.get("namenode");
|
||||||
|
log.info("hdfsURI is {}", hdfsuri);
|
||||||
|
|
||||||
|
final String sourcePath = argumentParser.get("sourcePath");
|
||||||
|
log.info("sourcePath is {}", sourcePath);
|
||||||
|
|
||||||
|
final String targetPath = argumentParser.get("targetPath");
|
||||||
|
log.info("targetPath is {}", targetPath);
|
||||||
|
|
||||||
|
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
|
||||||
|
|
||||||
|
new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
|
||||||
|
RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
|
||||||
|
final List<ORCIDExtractor> workers = new ArrayList<>();
|
||||||
|
int i = 0;
|
||||||
|
while (ls.hasNext()) {
|
||||||
|
LocatedFileStatus current = ls.next();
|
||||||
|
if (current.getPath().getName().endsWith("tar.gz")) {
|
||||||
|
workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workers.forEach(Thread::start);
|
||||||
|
for (ORCIDExtractor worker : workers) {
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,171 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**\
|
||||||
|
* The ORCIDExtractor class extracts ORCID data from a TAR archive.
|
||||||
|
* The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
|
||||||
|
* Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
|
||||||
|
* Finally, it closes all the SequenceFile.Writer objects.
|
||||||
|
*/
|
||||||
|
public class ORCIDExtractor extends Thread {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
|
||||||
|
|
||||||
|
private final FileSystem fileSystem;
|
||||||
|
|
||||||
|
private final String id;
|
||||||
|
|
||||||
|
private final Path sourcePath;
|
||||||
|
|
||||||
|
private final String baseOutputPath;
|
||||||
|
|
||||||
|
public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
|
||||||
|
this.fileSystem = fileSystem;
|
||||||
|
this.id = id;
|
||||||
|
this.sourcePath = sourcePath;
|
||||||
|
this.baseOutputPath = baseOutputPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* creates a map of SequenceFile.Writer objects,
|
||||||
|
* one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
|
||||||
|
* For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
|
||||||
|
* object that writes employment data.
|
||||||
|
* @return the Map
|
||||||
|
*/
|
||||||
|
private Map<String, SequenceFile.Writer> createMap() {
|
||||||
|
try {
|
||||||
|
log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
|
||||||
|
Map<String, SequenceFile.Writer> res = new HashMap<>();
|
||||||
|
if (sourcePath.getName().contains("summaries")) {
|
||||||
|
|
||||||
|
final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
|
||||||
|
final SequenceFile.Writer summary_file = SequenceFile
|
||||||
|
.createWriter(
|
||||||
|
fileSystem.getConf(),
|
||||||
|
SequenceFile.Writer.file(new Path(summaryPath)),
|
||||||
|
SequenceFile.Writer.keyClass(Text.class),
|
||||||
|
SequenceFile.Writer.valueClass(Text.class));
|
||||||
|
|
||||||
|
log.info("Thread {} Creating only summary path here {}", id, summaryPath);
|
||||||
|
res.put("summary", summary_file);
|
||||||
|
return res;
|
||||||
|
} else {
|
||||||
|
String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
|
||||||
|
final SequenceFile.Writer employments_file = SequenceFile
|
||||||
|
.createWriter(
|
||||||
|
fileSystem.getConf(),
|
||||||
|
SequenceFile.Writer.file(new Path(employmentsPath)),
|
||||||
|
SequenceFile.Writer.keyClass(Text.class),
|
||||||
|
SequenceFile.Writer.valueClass(Text.class));
|
||||||
|
res.put("employments", employments_file);
|
||||||
|
log.info("Thread {} Creating employments path here {}", id, employmentsPath);
|
||||||
|
|
||||||
|
final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
|
||||||
|
final SequenceFile.Writer works_file = SequenceFile
|
||||||
|
.createWriter(
|
||||||
|
fileSystem.getConf(),
|
||||||
|
SequenceFile.Writer.file(new Path(worksPath)),
|
||||||
|
SequenceFile.Writer.keyClass(Text.class),
|
||||||
|
SequenceFile.Writer.valueClass(Text.class));
|
||||||
|
res.put("works", works_file);
|
||||||
|
log.info("Thread {} Creating works path here {}", id, worksPath);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
} catch (Throwable e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
|
||||||
|
CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
|
||||||
|
CompressionCodec codec = factory.getCodec(sourcePath);
|
||||||
|
if (codec == null) {
|
||||||
|
System.err.println("No codec found for " + sourcePath.getName());
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
InputStream gzipInputStream = null;
|
||||||
|
try {
|
||||||
|
gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
|
||||||
|
final Map<String, SequenceFile.Writer> fileMap = createMap();
|
||||||
|
iterateTar(fileMap, gzipInputStream);
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} finally {
|
||||||
|
log.info("Closing gzip stream");
|
||||||
|
IOUtils.closeStream(gzipInputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
|
||||||
|
if (sourcePath.getName().contains("summaries")) {
|
||||||
|
return fileMap.get("summary");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (path.contains("works")) {
|
||||||
|
return fileMap.get("works");
|
||||||
|
}
|
||||||
|
if (path.contains("employments"))
|
||||||
|
return fileMap.get("employments");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
|
||||||
|
|
||||||
|
int extractedItem = 0;
|
||||||
|
try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
|
||||||
|
|
||||||
|
TarArchiveEntry entry;
|
||||||
|
while ((entry = tais.getNextTarEntry()) != null) {
|
||||||
|
|
||||||
|
if (entry.isFile()) {
|
||||||
|
|
||||||
|
final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
|
||||||
|
if (fl != null) {
|
||||||
|
final Text key = new Text(entry.getName());
|
||||||
|
final Text value = new Text(
|
||||||
|
org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
|
||||||
|
fl.append(key, value);
|
||||||
|
extractedItem++;
|
||||||
|
if (extractedItem % 100000 == 0) {
|
||||||
|
log.info("Thread {}: Extracted {} items", id, extractedItem);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
for (SequenceFile.Writer k : fileMap.values()) {
|
||||||
|
log.info("Thread {}: Completed processed {} items", id, extractedItem);
|
||||||
|
k.hflush();
|
||||||
|
k.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,251 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.ximpleware.*;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.collection.orcid.model.*;
|
||||||
|
import eu.dnetlib.dhp.parser.utility.VtdException;
|
||||||
|
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
||||||
|
|
||||||
|
public class OrcidParser {
|
||||||
|
|
||||||
|
final Logger log = LoggerFactory.getLogger(OrcidParser.class);
|
||||||
|
private VTDNav vn;
|
||||||
|
|
||||||
|
private AutoPilot ap;
|
||||||
|
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
|
||||||
|
private static final String NS_COMMON = "common";
|
||||||
|
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
|
||||||
|
private static final String NS_PERSON = "person";
|
||||||
|
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
|
||||||
|
private static final String NS_DETAILS = "personal-details";
|
||||||
|
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
|
||||||
|
private static final String NS_OTHER = "other-name";
|
||||||
|
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
|
||||||
|
private static final String NS_RECORD = "record";
|
||||||
|
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
|
||||||
|
private static final String NS_ACTIVITIES = "activities";
|
||||||
|
private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
|
||||||
|
private static final String NS_WORK = "work";
|
||||||
|
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
|
||||||
|
|
||||||
|
private static final String NS_ERROR = "error";
|
||||||
|
private static final String NS_HISTORY = "history";
|
||||||
|
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
|
||||||
|
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
|
||||||
|
private static final String NS_BULK = "bulk";
|
||||||
|
private static final String NS_EXTERNAL = "external-identifier";
|
||||||
|
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
|
||||||
|
|
||||||
|
private void generateParsedDocument(final String xml) throws ParseException {
|
||||||
|
final VTDGen vg = new VTDGen();
|
||||||
|
vg.setDoc(xml.getBytes());
|
||||||
|
vg.parse(true);
|
||||||
|
this.vn = vg.getNav();
|
||||||
|
this.ap = new AutoPilot(vn);
|
||||||
|
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
|
||||||
|
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Author parseSummary(final String xml) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
final Author author = new Author();
|
||||||
|
generateParsedDocument(xml);
|
||||||
|
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
|
||||||
|
.getTextValuesWithAttributes(
|
||||||
|
ap, vn, "//record:record", Arrays.asList("path"));
|
||||||
|
if (!recordNodes.isEmpty()) {
|
||||||
|
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
|
||||||
|
author.setOrcid(oid);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
|
||||||
|
.getTextValuesWithAttributes(
|
||||||
|
ap, vn, "//person:name", Arrays.asList("visibility"));
|
||||||
|
final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
|
||||||
|
author.setVisibility(visibility);
|
||||||
|
final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
|
||||||
|
author.setGivenName(name);
|
||||||
|
|
||||||
|
final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
|
||||||
|
author.setFamilyName(surnames);
|
||||||
|
|
||||||
|
final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
|
||||||
|
author.setCreditName(creditNames);
|
||||||
|
|
||||||
|
final String biography = VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//person:biography/personal-details:content");
|
||||||
|
author.setBiography(biography);
|
||||||
|
|
||||||
|
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
|
||||||
|
if (!otherNames.isEmpty()) {
|
||||||
|
author.setOtherNames(otherNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
ap.selectXPath("//external-identifier:external-identifier");
|
||||||
|
|
||||||
|
while (ap.evalXPath() != -1) {
|
||||||
|
final Pid pid = new Pid();
|
||||||
|
|
||||||
|
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||||
|
|
||||||
|
ap1.selectXPath("./common:external-id-type");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
pid.setSchema(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
ap1.selectXPath("./common:external-id-value");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
pid.setValue(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
|
||||||
|
author.addOtherPid(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return author;
|
||||||
|
} catch (Throwable e) {
|
||||||
|
log.error("Error on parsing {}", xml);
|
||||||
|
log.error(e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Work parseWork(final String xml) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
final Work work = new Work();
|
||||||
|
generateParsedDocument(xml);
|
||||||
|
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
|
||||||
|
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
|
||||||
|
if (!workNodes.isEmpty()) {
|
||||||
|
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
|
||||||
|
work.setOrcid(oid);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
ap.selectXPath("//common:external-id");
|
||||||
|
|
||||||
|
while (ap.evalXPath() != -1) {
|
||||||
|
final Pid pid = new Pid();
|
||||||
|
|
||||||
|
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||||
|
|
||||||
|
ap1.selectXPath("./common:external-id-type");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
pid.setSchema(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
ap1.selectXPath("./common:external-id-value");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
pid.setValue(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
|
||||||
|
work.addPid(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
|
||||||
|
|
||||||
|
return work;
|
||||||
|
} catch (Throwable e) {
|
||||||
|
log.error("Error on parsing {}", xml);
|
||||||
|
log.error(e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractEmploymentDate(final String xpath) throws Exception {
|
||||||
|
|
||||||
|
ap.selectXPath(xpath);
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
while (ap.evalXPath() != -1) {
|
||||||
|
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||||
|
ap1.selectXPath("./common:year");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
sb.append(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
ap1.selectXPath("./common:month");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
sb.append("-");
|
||||||
|
sb.append(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
ap1.selectXPath("./common:day");
|
||||||
|
while (ap1.evalXPath() != -1) {
|
||||||
|
int it = vn.getText();
|
||||||
|
sb.append("-");
|
||||||
|
sb.append(vn.toNormalizedString(it));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Employment parseEmployment(final String xml) {
|
||||||
|
try {
|
||||||
|
final Employment employment = new Employment();
|
||||||
|
generateParsedDocument(xml);
|
||||||
|
final String oid = VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//common:source-orcid/common:path");
|
||||||
|
if (StringUtils.isNotBlank(oid)) {
|
||||||
|
employment.setOrcid(oid);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final String depName = VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//common:department-name");
|
||||||
|
final String rolTitle = VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//common:role-title");
|
||||||
|
if (StringUtils.isNotBlank(rolTitle))
|
||||||
|
employment.setRoleTitle(rolTitle);
|
||||||
|
if (StringUtils.isNotBlank(depName))
|
||||||
|
employment.setDepartmentName(depName);
|
||||||
|
else
|
||||||
|
employment
|
||||||
|
.setDepartmentName(
|
||||||
|
VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//common:organization/common:name"));
|
||||||
|
|
||||||
|
employment.setStartDate(extractEmploymentDate("//common:start-date"));
|
||||||
|
employment.setEndDate(extractEmploymentDate("//common:end-date"));
|
||||||
|
|
||||||
|
final String affiliationId = VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
|
||||||
|
final String affiliationIdType = VtdUtilityParser
|
||||||
|
.getSingleValue(ap, vn, "//common:disambiguation-source");
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
|
||||||
|
employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
|
||||||
|
|
||||||
|
return employment;
|
||||||
|
} catch (Throwable e) {
|
||||||
|
log.error("Error on parsing {}", xml);
|
||||||
|
log.error(e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid.model;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Author extends ORCIDItem {
|
||||||
|
private String givenName;
|
||||||
|
private String familyName;
|
||||||
|
|
||||||
|
private String visibility;
|
||||||
|
|
||||||
|
private String creditName;
|
||||||
|
|
||||||
|
private List<String> otherNames;
|
||||||
|
|
||||||
|
private List<Pid> otherPids;
|
||||||
|
|
||||||
|
private String biography;
|
||||||
|
|
||||||
|
public String getBiography() {
|
||||||
|
return biography;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBiography(String biography) {
|
||||||
|
this.biography = biography;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getGivenName() {
|
||||||
|
return givenName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGivenName(String givenName) {
|
||||||
|
this.givenName = givenName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFamilyName() {
|
||||||
|
return familyName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFamilyName(String familyName) {
|
||||||
|
this.familyName = familyName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCreditName() {
|
||||||
|
return creditName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCreditName(String creditName) {
|
||||||
|
this.creditName = creditName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getOtherNames() {
|
||||||
|
return otherNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOtherNames(List<String> otherNames) {
|
||||||
|
this.otherNames = otherNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVisibility() {
|
||||||
|
return visibility;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVisibility(String visibility) {
|
||||||
|
this.visibility = visibility;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Pid> getOtherPids() {
|
||||||
|
return otherPids;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOtherPids(List<Pid> otherPids) {
|
||||||
|
this.otherPids = otherPids;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addOtherPid(final Pid pid) {
|
||||||
|
|
||||||
|
if (otherPids == null)
|
||||||
|
otherPids = new ArrayList<>();
|
||||||
|
otherPids.add(pid);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid.model;
|
||||||
|
|
||||||
|
public class Employment extends ORCIDItem {
|
||||||
|
|
||||||
|
private String startDate;
|
||||||
|
private String EndDate;
|
||||||
|
|
||||||
|
private Pid affiliationId;
|
||||||
|
|
||||||
|
private String departmentName;
|
||||||
|
|
||||||
|
private String roleTitle;
|
||||||
|
|
||||||
|
public String getStartDate() {
|
||||||
|
return startDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStartDate(String startDate) {
|
||||||
|
this.startDate = startDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEndDate() {
|
||||||
|
return EndDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEndDate(String endDate) {
|
||||||
|
EndDate = endDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pid getAffiliationId() {
|
||||||
|
return affiliationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAffiliationId(Pid affiliationId) {
|
||||||
|
this.affiliationId = affiliationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDepartmentName() {
|
||||||
|
return departmentName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDepartmentName(String departmentName) {
|
||||||
|
this.departmentName = departmentName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRoleTitle() {
|
||||||
|
return roleTitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRoleTitle(String roleTitle) {
|
||||||
|
this.roleTitle = roleTitle;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid.model;
|
||||||
|
|
||||||
|
public class ORCIDItem {
|
||||||
|
private String orcid;
|
||||||
|
|
||||||
|
public String getOrcid() {
|
||||||
|
return orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrcid(String orcid) {
|
||||||
|
this.orcid = orcid;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid.model;
|
||||||
|
|
||||||
|
public class Pid {
|
||||||
|
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
private String schema;
|
||||||
|
|
||||||
|
public Pid() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pid(String value, String schema) {
|
||||||
|
this.value = value;
|
||||||
|
this.schema = schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSchema() {
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSchema(String schema) {
|
||||||
|
this.schema = schema;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid.model;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Work extends ORCIDItem {
|
||||||
|
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
private List<Pid> pids;
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Pid> getPids() {
|
||||||
|
return pids;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPids(List<Pid> pids) {
|
||||||
|
this.pids = pids;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addPid(Pid pid) {
|
||||||
|
if (pids == null)
|
||||||
|
pids = new ArrayList<>();
|
||||||
|
pids.add(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "n",
|
||||||
|
"paramLongName": "namenode",
|
||||||
|
"paramDescription": "the Name Node URI",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "t",
|
||||||
|
"paramLongName": "targetPath",
|
||||||
|
"paramDescription": "the target PATH where download the files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "a",
|
||||||
|
"paramLongName": "apiURL",
|
||||||
|
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,21 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "n",
|
||||||
|
"paramLongName": "namenode",
|
||||||
|
"paramDescription": "the Name Node URI",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "t",
|
||||||
|
"paramLongName": "targetPath",
|
||||||
|
"paramDescription": "the target PATH to extract files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "s",
|
||||||
|
"paramLongName": "sourcePath",
|
||||||
|
"paramDescription": "the PATH where the tar.gz files were downloaded",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,21 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "m",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "the master name",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "t",
|
||||||
|
"paramLongName": "targetPath",
|
||||||
|
"paramDescription": "the target PATH of the DF tables",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "s",
|
||||||
|
"paramLongName": "sourcePath",
|
||||||
|
"paramDescription": "the PATH of the ORCID sequence file",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,23 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,81 @@
|
||||||
|
<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>targetPath</name>
|
||||||
|
<description>the path to store the original ORCID dump</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>apiURL</name>
|
||||||
|
<description>The figshare API URL to retrieve the list file to download</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="generateTables"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="DownloadDUMP">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
|
||||||
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||||
|
<arg>--apiURL</arg><arg>${apiURL}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="extractDump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extractDump">
|
||||||
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
|
||||||
|
<main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
|
||||||
|
<java-opts> -Xmx6g </java-opts>
|
||||||
|
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${targetPath}</arg>
|
||||||
|
<arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="generateTables"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="generateTables">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Generate ORCID Tables</name>
|
||||||
|
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=2g
|
||||||
|
--conf spark.sql.shuffle.partitions=3000
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
|
||||||
|
<arg>--targetPath</arg><arg>${targetPath}/tables</arg>
|
||||||
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,21 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "n",
|
||||||
|
"paramLongName": "namenode",
|
||||||
|
"paramDescription": "the Name Node URI",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "t",
|
||||||
|
"paramLongName": "targetPath",
|
||||||
|
"paramDescription": "the target PATH where download the files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "a",
|
||||||
|
"paramLongName": "apiURL",
|
||||||
|
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
|
@ -0,0 +1,101 @@
|
||||||
|
package eu.dnetlib.dhp.collection.orcid
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
|
||||||
|
import org.apache.hadoop.io.Text
|
||||||
|
import org.apache.spark.SparkContext
|
||||||
|
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val sourcePath: String = parser.get("sourcePath")
|
||||||
|
log.info("found parameters sourcePath: {}", sourcePath)
|
||||||
|
val targetPath: String = parser.get("targetPath")
|
||||||
|
log.info("found parameters targetPath: {}", targetPath)
|
||||||
|
extractORCIDTable(spark, sourcePath, targetPath)
|
||||||
|
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
|
||||||
|
extractORCIDWorksTable(spark, sourcePath, targetPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||||
|
val sc: SparkContext = spark.sparkContext
|
||||||
|
import spark.implicits._
|
||||||
|
val df = sc
|
||||||
|
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
|
||||||
|
.map { case (x, y) => (x.toString, y.toString) }
|
||||||
|
.toDF
|
||||||
|
.as[(String, String)]
|
||||||
|
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
|
||||||
|
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||||
|
df.filter(r => r._1.contains("summaries"))
|
||||||
|
.map { r =>
|
||||||
|
val p = new OrcidParser
|
||||||
|
p.parseSummary(r._2)
|
||||||
|
}
|
||||||
|
.filter(p => p != null)
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.save(s"$targetPath/Authors")
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||||
|
val sc: SparkContext = spark.sparkContext
|
||||||
|
import spark.implicits._
|
||||||
|
val df = sc
|
||||||
|
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
|
||||||
|
.map { case (x, y) => (x.toString, y.toString) }
|
||||||
|
.toDF
|
||||||
|
.as[(String, String)]
|
||||||
|
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
|
||||||
|
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||||
|
df.filter(r => r._1.contains("works"))
|
||||||
|
.map { r =>
|
||||||
|
val p = new OrcidParser
|
||||||
|
p.parseWork(r._2)
|
||||||
|
}
|
||||||
|
.filter(p => p != null)
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.save(s"$targetPath/Works")
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||||
|
val sc: SparkContext = spark.sparkContext
|
||||||
|
import spark.implicits._
|
||||||
|
val df = sc
|
||||||
|
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
|
||||||
|
.map { case (x, y) => (x.toString, y.toString) }
|
||||||
|
.toDF
|
||||||
|
.as[(String, String)]
|
||||||
|
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
|
||||||
|
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||||
|
df.filter(r => r._1.contains("employments"))
|
||||||
|
.map { r =>
|
||||||
|
val p = new OrcidParser
|
||||||
|
p.parseEmployment(r._2)
|
||||||
|
}
|
||||||
|
.filter(p => p != null)
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.save(s"$targetPath/Employments")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkGenerateORCIDTable {
|
||||||
|
|
||||||
|
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
|
||||||
|
new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
|
||||||
|
.initialize()
|
||||||
|
.run()
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
|
||||||
vocabularies.getSynonymAsQualifier(
|
vocabularies.getSynonymAsQualifier(
|
||||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||||
typeQualifier.getClassid
|
typeQualifier.getClassid
|
||||||
), resourceType
|
),
|
||||||
|
resourceType
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if (schemaOrg != null && schemaOrg.nonEmpty) {
|
if (schemaOrg != null && schemaOrg.nonEmpty) {
|
||||||
|
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
|
||||||
vocabularies.getSynonymAsQualifier(
|
vocabularies.getSynonymAsQualifier(
|
||||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||||
typeQualifier.getClassid
|
typeQualifier.getClassid
|
||||||
), schemaOrg
|
),
|
||||||
|
schemaOrg
|
||||||
)
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
|
||||||
vocabularies.getSynonymAsQualifier(
|
vocabularies.getSynonymAsQualifier(
|
||||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||||
typeQualifier.getClassid
|
typeQualifier.getClassid
|
||||||
), resourceTypeGeneral
|
),
|
||||||
|
resourceTypeGeneral
|
||||||
)
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -228,7 +231,6 @@ object DataciteToOAFTransformation {
|
||||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||||
i.setInstanceTypeMapping(List(itm).asJava)
|
i.setInstanceTypeMapping(List(itm).asJava)
|
||||||
|
|
||||||
|
|
||||||
typeQualifiers._2.getClassname match {
|
typeQualifiers._2.getClassname match {
|
||||||
case "dataset" =>
|
case "dataset" =>
|
||||||
val r = new OafDataset
|
val r = new OafDataset
|
||||||
|
|
|
@ -593,7 +593,6 @@ object BioDBToOAF {
|
||||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||||
i.setInstanceTypeMapping(List(itm).asJava)
|
i.setInstanceTypeMapping(List(itm).asJava)
|
||||||
|
|
||||||
|
|
||||||
i.setCollectedfrom(collectedFromMap("ebi"))
|
i.setCollectedfrom(collectedFromMap("ebi"))
|
||||||
d.setInstance(List(i).asJava)
|
d.setInstance(List(i).asJava)
|
||||||
i.setDateofacceptance(
|
i.setDateofacceptance(
|
||||||
|
|
|
@ -205,8 +205,7 @@ object PubMedToOaf {
|
||||||
itm.setOriginalType(i_type.get._1)
|
itm.setOriginalType(i_type.get._1)
|
||||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||||
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
|
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
val result = createResult(pubmedInstance.getInstancetype, vocabularies)
|
val result = createResult(pubmedInstance.getInstancetype, vocabularies)
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.collection.orcid;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.spark.SparkContext;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.ximpleware.NavException;
|
||||||
|
import com.ximpleware.ParseException;
|
||||||
|
import com.ximpleware.XPathEvalException;
|
||||||
|
import com.ximpleware.XPathParseException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.collection.orcid.model.Author;
|
||||||
|
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
|
||||||
|
import eu.dnetlib.dhp.parser.utility.VtdException;
|
||||||
|
|
||||||
|
public class DownloadORCIDTest {
|
||||||
|
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSummary() throws Exception {
|
||||||
|
final String xml = IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
|
||||||
|
|
||||||
|
final OrcidParser parser = new OrcidParser();
|
||||||
|
ORCIDItem orcidItem = parser.parseSummary(xml);
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
System.out.println(mapper.writeValueAsString(orcidItem));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParsingWork() throws Exception {
|
||||||
|
|
||||||
|
final List<String> works_path = Arrays
|
||||||
|
.asList(
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
final OrcidParser parser = new OrcidParser();
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
works_path.stream().map(s -> {
|
||||||
|
try {
|
||||||
|
return IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
getClass()
|
||||||
|
.getResourceAsStream(
|
||||||
|
s)));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}).forEach(s -> {
|
||||||
|
try {
|
||||||
|
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParsingEmployments() throws Exception {
|
||||||
|
|
||||||
|
final List<String> works_path = Arrays
|
||||||
|
.asList(
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
|
||||||
|
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
final OrcidParser parser = new OrcidParser();
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
works_path.stream().map(s -> {
|
||||||
|
try {
|
||||||
|
return IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
getClass()
|
||||||
|
.getResourceAsStream(
|
||||||
|
s)));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}).forEach(s -> {
|
||||||
|
try {
|
||||||
|
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
|
||||||
|
<common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
|
||||||
|
<common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-client-id>
|
||||||
|
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
|
||||||
|
<common:path>0000-0002-5982-8983</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-client-id>
|
||||||
|
<common:source-name>Scopus - Elsevier</common:source-name>
|
||||||
|
<common:assertion-origin-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
|
||||||
|
<common:path>0000-0001-5010-5001</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:assertion-origin-orcid>
|
||||||
|
<common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
|
||||||
|
</work:title>
|
||||||
|
<work:journal-title>American Journal of Neuroradiology</work:journal-title>
|
||||||
|
<work:citation>
|
||||||
|
<work:citation-type>bibtex</work:citation-type>
|
||||||
|
<work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
|
||||||
|
</work:citation>
|
||||||
|
<work:type>journal-article</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2014</common:year>
|
||||||
|
</common:publication-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>doi</common:external-id-type>
|
||||||
|
<common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>eid</common:external-id-type>
|
||||||
|
<common:external-id-value>2-s2.0-84911865199</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&partnerID=MN8TOARS</common:url>
|
||||||
|
<work:contributors>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Durst, C.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Starke, R.M.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Gaughen, J.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Nguyen, Q.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Patrie, J.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Jensen, M.E.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Evans, A.J.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
</work:contributors>
|
||||||
|
</work:work>
|
|
@ -0,0 +1,79 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||||
|
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work"
|
||||||
|
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
|
||||||
|
path="/0000-0001-5349-4030/work/50101152" visibility="public">
|
||||||
|
<common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
|
||||||
|
<common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-client-id>
|
||||||
|
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
|
||||||
|
<common:path>0000-0002-5982-8983</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-client-id>
|
||||||
|
<common:source-name>Scopus - Elsevier</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
|
||||||
|
racially homogenous and heterogeneous U.S. history classrooms</common:title>
|
||||||
|
</work:title>
|
||||||
|
<work:journal-title>Journal of Social Studies Research</work:journal-title>
|
||||||
|
<work:citation>
|
||||||
|
<work:citation-type>bibtex</work:citation-type>
|
||||||
|
<work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
|
||||||
|
candor in addressing social injustices in racially homogenous and heterogeneous U.S.
|
||||||
|
history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
|
||||||
|
= {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
|
||||||
|
</work:citation>
|
||||||
|
<work:type>journal-article</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2018</common:year>
|
||||||
|
</common:publication-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>doi</common:external-id-type>
|
||||||
|
<common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true"
|
||||||
|
>10.1016/j.jssr.2018.01.004</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>eid</common:external-id-type>
|
||||||
|
<common:external-id-value>2-s2.0-85041949043</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true"
|
||||||
|
>2-s2.0-85041949043</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&partnerID=MN8TOARS</common:url>
|
||||||
|
<work:contributors>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Parkhouse, H.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Massaro, V.R.</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
</work:contributors>
|
||||||
|
</work:work>
|
|
@ -0,0 +1,113 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||||
|
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work"
|
||||||
|
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||||
|
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||||
|
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||||
|
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||||
|
<common:path>0000-0002-9157-3431</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Europe PubMed Central</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||||
|
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||||
|
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||||
|
</work:title>
|
||||||
|
<work:citation>
|
||||||
|
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||||
|
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||||
|
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||||
|
</work:citation>
|
||||||
|
<work:type>journal-article</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2016</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
</common:publication-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmid</common:external-id-type>
|
||||||
|
<common:external-id-value>27899851</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmc</common:external-id-type>
|
||||||
|
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true"
|
||||||
|
>PMC5126442</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||||
|
<work:contributors>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Abdel-Dayem K</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Abdel-Dayem Fake</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Eweda II</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>El-Sherbiny A</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Dimitry MO</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Nammas W</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
</work:contributors>
|
||||||
|
</work:work>
|
|
@ -0,0 +1,106 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||||
|
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work"
|
||||||
|
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||||
|
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||||
|
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||||
|
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||||
|
<common:path>0000-0002-9157-3431</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Europe PubMed Central</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||||
|
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||||
|
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||||
|
</work:title>
|
||||||
|
<work:citation>
|
||||||
|
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||||
|
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||||
|
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||||
|
</work:citation>
|
||||||
|
<work:type>journal-article</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2016</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
</common:publication-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmid</common:external-id-type>
|
||||||
|
<common:external-id-value>27899851</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmc</common:external-id-type>
|
||||||
|
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true"
|
||||||
|
>PMC5126442</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||||
|
<work:contributors>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Khair Abde Daye</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Eweda II</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>El-Sherbiny A</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Dimitry MO</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>Nammas W</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>first</work:contributor-sequence>
|
||||||
|
<work:contributor-role>author</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
</work:contributors>
|
||||||
|
</work:work>
|
|
@ -0,0 +1,101 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||||
|
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work"
|
||||||
|
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||||
|
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||||
|
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||||
|
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||||
|
<common:path>0000-0002-9157-3431</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Europe PubMed Central</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||||
|
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||||
|
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||||
|
</work:title>
|
||||||
|
<work:citation>
|
||||||
|
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||||
|
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||||
|
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||||
|
</work:citation>
|
||||||
|
<work:type>journal-article</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2016</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
</common:publication-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmid</common:external-id-type>
|
||||||
|
<common:external-id-value>27899851</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>pmc</common:external-id-type>
|
||||||
|
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true"
|
||||||
|
>PMC5126442</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||||
|
<work:contributors>
|
||||||
|
<work:contributor>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>seq0</work:contributor-sequence>
|
||||||
|
<work:contributor-role>role0</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>creditname1</work:credit-name>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>creditname2</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>seq2</work:contributor-sequence>
|
||||||
|
<work:contributor-role></work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name>creditname3</work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence></work:contributor-sequence>
|
||||||
|
<work:contributor-role>role3</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
<work:contributor>
|
||||||
|
<work:credit-name></work:credit-name>
|
||||||
|
<work:contributor-attributes>
|
||||||
|
<work:contributor-sequence>seq4</work:contributor-sequence>
|
||||||
|
<work:contributor-role>role4</work:contributor-role>
|
||||||
|
</work:contributor-attributes>
|
||||||
|
</work:contributor>
|
||||||
|
</work:contributors>
|
||||||
|
</work:work>
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
|
||||||
|
xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
|
||||||
|
put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
|
||||||
|
visibility="public">
|
||||||
|
<common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
|
||||||
|
<common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
|
||||||
|
<common:path>0000-0001-5010-5001</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Quang Nguyen</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Beth Israel Deaconess Medical Center</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Boston</common:city>
|
||||||
|
<common:region>MA</common:region>
|
||||||
|
<common:country>US</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</employment:employment>
|
|
@ -0,0 +1,55 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
|
||||||
|
xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
|
||||||
|
put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
|
||||||
|
visibility="public">
|
||||||
|
<common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
|
||||||
|
<common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
|
||||||
|
<common:path>0000-0001-5011-3001</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>zhengyan li</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2008</common:year>
|
||||||
|
<common:month>09</common:month>
|
||||||
|
<common:day>01</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Anhui Academy of Agricultural Sciences</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Hefei</common:city>
|
||||||
|
<common:region>Anhui</common:region>
|
||||||
|
<common:country>CN</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</employment:employment>
|
|
@ -0,0 +1,62 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
|
||||||
|
xmlns:history="http://www.orcid.org/ns/history"
|
||||||
|
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||||
|
xmlns:education="http://www.orcid.org/ns/education"
|
||||||
|
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||||
|
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||||
|
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||||
|
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||||
|
xmlns:service="http://www.orcid.org/ns/service"
|
||||||
|
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||||
|
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||||
|
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||||
|
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||||
|
xmlns:person="http://www.orcid.org/ns/person"
|
||||||
|
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||||
|
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||||
|
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||||
|
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||||
|
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||||
|
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||||
|
xmlns:error="http://www.orcid.org/ns/error"
|
||||||
|
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||||
|
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||||
|
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
|
||||||
|
put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
|
||||||
|
visibility="public">
|
||||||
|
<common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
|
||||||
|
<common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
|
||||||
|
<common:path>0000-0001-5012-1001</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Asma Bazzi</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Pathology and Laboratory Medicine</common:department-name>
|
||||||
|
<common:role-title>Medical Laboratory Technologist</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>1994</common:year>
|
||||||
|
<common:month>10</common:month>
|
||||||
|
<common:day>01</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:end-date>
|
||||||
|
<common:year>2000</common:year>
|
||||||
|
<common:month>06</common:month>
|
||||||
|
<common:day>30</common:day>
|
||||||
|
</common:end-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>American University of Beirut</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Hamra</common:city>
|
||||||
|
<common:region>Beirut</common:region>
|
||||||
|
<common:country>LB</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</employment:employment>
|
|
@ -0,0 +1,581 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
|
||||||
|
<common:orcid-identifier>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:orcid-identifier>
|
||||||
|
<preferences:preferences>
|
||||||
|
<preferences:locale>es</preferences:locale>
|
||||||
|
</preferences:preferences>
|
||||||
|
<history:history>
|
||||||
|
<history:creation-method>Direct</history:creation-method>
|
||||||
|
<history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
|
||||||
|
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||||
|
<history:claimed>true</history:claimed>
|
||||||
|
<history:verified-email>true</history:verified-email>
|
||||||
|
<history:verified-primary-email>true</history:verified-primary-email>
|
||||||
|
</history:history>
|
||||||
|
<person:person path="/0000-0001-5045-1000/person">
|
||||||
|
<person:name visibility="public" path="0000-0001-5045-1000">
|
||||||
|
<common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
|
||||||
|
<personal-details:given-names>Patricio</personal-details:given-names>
|
||||||
|
<personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
|
||||||
|
</person:name>
|
||||||
|
<other-name:other-names path="/0000-0001-5045-1000/other-names"/>
|
||||||
|
<person:biography visibility="public" path="/0000-0001-5045-1000/biography">
|
||||||
|
<common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
|
||||||
|
<personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
|
||||||
|
</person:biography>
|
||||||
|
<researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
|
||||||
|
<email:emails path="/0000-0001-5045-1000/email"/>
|
||||||
|
<address:addresses path="/0000-0001-5045-1000/address"/>
|
||||||
|
<keyword:keywords path="/0000-0001-5045-1000/keywords"/>
|
||||||
|
<external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
|
||||||
|
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
|
||||||
|
<external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
|
||||||
|
<common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
|
||||||
|
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-client-id>
|
||||||
|
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
|
||||||
|
<common:path>0000-0002-5982-8983</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-client-id>
|
||||||
|
<common:source-name>Scopus - Elsevier</common:source-name>
|
||||||
|
<common:assertion-origin-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
|
||||||
|
<common:path>0000-0001-7291-3210</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:assertion-origin-orcid>
|
||||||
|
<common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
|
||||||
|
</common:source>
|
||||||
|
<common:external-id-type>Scopus Author ID</common:external-id-type>
|
||||||
|
<common:external-id-value>6602255248</common:external-id-value>
|
||||||
|
<common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&partnerID=MN8TOARS</common:external-id-url>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</external-identifier:external-identifier>
|
||||||
|
</external-identifier:external-identifiers>
|
||||||
|
</person:person>
|
||||||
|
<activities:activities-summary path="/0000-0001-5045-1000/activities">
|
||||||
|
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||||
|
<activities:distinctions path="/0000-0001-5045-1000/distinctions">
|
||||||
|
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
|
||||||
|
<common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Programa de Maestría</common:department-name>
|
||||||
|
<common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2014</common:year>
|
||||||
|
<common:month>10</common:month>
|
||||||
|
<common:day>20</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Instituto de Altos Estudios Nacionales</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Quito</common:city>
|
||||||
|
<common:region>Pichincha</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</distinction:distinction-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
</activities:distinctions>
|
||||||
|
<activities:educations path="/0000-0001-5045-1000/educations">
|
||||||
|
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
|
||||||
|
<common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Programa de Doctorado en Sociología</common:department-name>
|
||||||
|
<common:role-title>Doctorando del Programa de Sociología</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2020</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
<common:day>06</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Madrid</common:city>
|
||||||
|
<common:region>Comunidad de Madrid</common:region>
|
||||||
|
<common:country>ES</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</education:education-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
</activities:educations>
|
||||||
|
<activities:employments path="/0000-0001-5045-1000/employments">
|
||||||
|
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
|
||||||
|
<common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
|
||||||
|
<common:role-title>Especialista de Proyectos y docente</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2021</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
<common:day>01</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Universidad de las Artes</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Guayaquil</common:city>
|
||||||
|
<common:region>Guayas</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</employment:employment-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
|
||||||
|
<common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
|
||||||
|
<common:role-title>Director</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2019</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
<common:day>05</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:end-date>
|
||||||
|
<common:year>2021</common:year>
|
||||||
|
<common:month>10</common:month>
|
||||||
|
<common:day>31</common:day>
|
||||||
|
</common:end-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Universidad Regional Amazónica IKIAM</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Tena</common:city>
|
||||||
|
<common:region>Napo</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
<common:url>http://ikiam.edu.ec</common:url>
|
||||||
|
</employment:employment-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
</activities:employments>
|
||||||
|
<activities:fundings path="/0000-0001-5045-1000/fundings"/>
|
||||||
|
<activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
|
||||||
|
<activities:memberships path="/0000-0001-5045-1000/memberships">
|
||||||
|
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
|
||||||
|
<common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Artes Escénicas</common:department-name>
|
||||||
|
<common:role-title>Miembro</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2000</common:year>
|
||||||
|
<common:month>07</common:month>
|
||||||
|
<common:day>15</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Casa de la Cultura Ecuatoriana</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Riobamba</common:city>
|
||||||
|
<common:region>Sierra Centro</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
</common:organization>
|
||||||
|
</membership:membership-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
</activities:memberships>
|
||||||
|
<activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
|
||||||
|
<activities:qualifications path="/0000-0001-5045-1000/qualifications">
|
||||||
|
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
|
||||||
|
<common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Programa de Gobernabilidad</common:department-name>
|
||||||
|
<common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2014</common:year>
|
||||||
|
<common:month>10</common:month>
|
||||||
|
<common:day>20</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:end-date>
|
||||||
|
<common:year>2017</common:year>
|
||||||
|
<common:month>01</common:month>
|
||||||
|
<common:day>26</common:day>
|
||||||
|
</common:end-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Instituto de Altos Estudios Nacionales</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Quito</common:city>
|
||||||
|
<common:region>Pichincha</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</qualification:qualification-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
|
||||||
|
<common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Posgrados</common:department-name>
|
||||||
|
<common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>2001</common:year>
|
||||||
|
<common:month>03</common:month>
|
||||||
|
<common:day>09</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:end-date>
|
||||||
|
<common:year>2003</common:year>
|
||||||
|
<common:month>02</common:month>
|
||||||
|
<common:day>27</common:day>
|
||||||
|
</common:end-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Universidad Nacional de Chimborazo</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Riobamba</common:city>
|
||||||
|
<common:region>Chimborazo</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</qualification:qualification-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
|
||||||
|
<common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Ciencias de la Educación</common:department-name>
|
||||||
|
<common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>1994</common:year>
|
||||||
|
<common:month>10</common:month>
|
||||||
|
<common:day>03</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:end-date>
|
||||||
|
<common:year>2000</common:year>
|
||||||
|
<common:month>01</common:month>
|
||||||
|
<common:day>31</common:day>
|
||||||
|
</common:end-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Universidad Nacional de Chimborazo</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Riobamba</common:city>
|
||||||
|
<common:region>Chimborazo</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</qualification:qualification-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
<activities:affiliation-group>
|
||||||
|
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
|
||||||
|
<common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<common:department-name>Facultad de Artes</common:department-name>
|
||||||
|
<common:role-title>Licenciado en Artes</common:role-title>
|
||||||
|
<common:start-date>
|
||||||
|
<common:year>1989</common:year>
|
||||||
|
<common:month>09</common:month>
|
||||||
|
<common:day>05</common:day>
|
||||||
|
</common:start-date>
|
||||||
|
<common:end-date>
|
||||||
|
<common:year>1997</common:year>
|
||||||
|
<common:month>08</common:month>
|
||||||
|
<common:day>07</common:day>
|
||||||
|
</common:end-date>
|
||||||
|
<common:organization>
|
||||||
|
<common:name>Universidad Central del Ecuador</common:name>
|
||||||
|
<common:address>
|
||||||
|
<common:city>Quito</common:city>
|
||||||
|
<common:region>Pichincha</common:region>
|
||||||
|
<common:country>EC</common:country>
|
||||||
|
</common:address>
|
||||||
|
<common:disambiguated-organization>
|
||||||
|
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
|
||||||
|
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||||
|
</common:disambiguated-organization>
|
||||||
|
</common:organization>
|
||||||
|
</qualification:qualification-summary>
|
||||||
|
</activities:affiliation-group>
|
||||||
|
</activities:qualifications>
|
||||||
|
<activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
|
||||||
|
<activities:services path="/0000-0001-5045-1000/services"/>
|
||||||
|
<activities:works path="/0000-0001-5045-1000/works">
|
||||||
|
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||||
|
<activities:group>
|
||||||
|
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
|
||||||
|
<common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
|
||||||
|
</work:title>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>isbn</common:external-id-type>
|
||||||
|
<common:external-id-value>978-9942-29-089-2</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>part-of</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<work:type>book-chapter</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2023</common:year>
|
||||||
|
<common:month>06</common:month>
|
||||||
|
<common:day>07</common:day>
|
||||||
|
</common:publication-date>
|
||||||
|
<work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
|
||||||
|
</work:work-summary>
|
||||||
|
</activities:group>
|
||||||
|
<activities:group>
|
||||||
|
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
|
||||||
|
<common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
|
||||||
|
</work:title>
|
||||||
|
<common:external-ids/>
|
||||||
|
<work:type>conference-abstract</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2022</common:year>
|
||||||
|
<common:month>10</common:month>
|
||||||
|
<common:day>06</common:day>
|
||||||
|
</common:publication-date>
|
||||||
|
</work:work-summary>
|
||||||
|
</activities:group>
|
||||||
|
<activities:group>
|
||||||
|
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>other-id</common:external-id-type>
|
||||||
|
<common:external-id-value>2018</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
|
||||||
|
<common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
|
||||||
|
</work:title>
|
||||||
|
<common:external-ids>
|
||||||
|
<common:external-id>
|
||||||
|
<common:external-id-type>other-id</common:external-id-type>
|
||||||
|
<common:external-id-value>2018</common:external-id-value>
|
||||||
|
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
|
||||||
|
<common:external-id-relationship>self</common:external-id-relationship>
|
||||||
|
</common:external-id>
|
||||||
|
</common:external-ids>
|
||||||
|
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
|
||||||
|
<work:type>conference-poster</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2018</common:year>
|
||||||
|
<common:month>11</common:month>
|
||||||
|
<common:day>30</common:day>
|
||||||
|
</common:publication-date>
|
||||||
|
<work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
|
||||||
|
</work:work-summary>
|
||||||
|
</activities:group>
|
||||||
|
<activities:group>
|
||||||
|
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
|
||||||
|
<common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
|
||||||
|
</work:title>
|
||||||
|
<common:external-ids/>
|
||||||
|
<work:type>dissertation-thesis</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2017</common:year>
|
||||||
|
<common:month>01</common:month>
|
||||||
|
<common:day>26</common:day>
|
||||||
|
</common:publication-date>
|
||||||
|
</work:work-summary>
|
||||||
|
</activities:group>
|
||||||
|
<activities:group>
|
||||||
|
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||||
|
<common:external-ids/>
|
||||||
|
<work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
|
||||||
|
<common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
|
||||||
|
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||||
|
<common:source>
|
||||||
|
<common:source-orcid>
|
||||||
|
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||||
|
<common:path>0000-0001-5045-1000</common:path>
|
||||||
|
<common:host>orcid.org</common:host>
|
||||||
|
</common:source-orcid>
|
||||||
|
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||||
|
</common:source>
|
||||||
|
<work:title>
|
||||||
|
<common:title>La Rebelión de los Dioses</common:title>
|
||||||
|
</work:title>
|
||||||
|
<common:external-ids/>
|
||||||
|
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
|
||||||
|
<work:type>registered-copyright</work:type>
|
||||||
|
<common:publication-date>
|
||||||
|
<common:year>2001</common:year>
|
||||||
|
<common:month>08</common:month>
|
||||||
|
<common:day>28</common:day>
|
||||||
|
</common:publication-date>
|
||||||
|
<work:journal-title>Editorial pedagógica freire</work:journal-title>
|
||||||
|
</work:work-summary>
|
||||||
|
</activities:group>
|
||||||
|
</activities:works>
|
||||||
|
</activities:activities-summary>
|
||||||
|
</record:record>
|
|
@ -133,32 +133,6 @@
|
||||||
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
|
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="PreProcessORCID"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<!-- ORCID SECTION -->
|
|
||||||
<action name="PreProcessORCID">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn-cluster</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>Convert ORCID to Dataset</name>
|
|
||||||
<class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
|
|
||||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.sql.shuffle.partitions=3840
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
|
|
||||||
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
|
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
|
@ -59,10 +59,10 @@
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!-- ORCID Parameters -->
|
<!-- ORCID Parameters -->
|
||||||
<property>
|
<!-- <property>-->
|
||||||
<name>workingPathOrcid</name>
|
<!-- <name>workingPathOrcid</name>-->
|
||||||
<description>the ORCID working path</description>
|
<!-- <description>the ORCID working path</description>-->
|
||||||
</property>
|
<!-- </property>-->
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
|
@ -170,32 +170,6 @@
|
||||||
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
|
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="ProcessORCID"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<!-- ORCID SECTION -->
|
|
||||||
<action name="ProcessORCID">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn-cluster</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>Convert ORCID to Dataset</name>
|
|
||||||
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
|
|
||||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.sql.shuffle.partitions=3840
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
|
|
||||||
<arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
|
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="CreateDOIBoost"/>
|
<ok to="CreateDOIBoost"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
|
@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
|
||||||
Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
||||||
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
|
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
|
||||||
|
|
||||||
logger.info("Phase 2) Join Crossref with UnpayWall")
|
logger.info("Phase 1) Join Crossref with UnpayWall")
|
||||||
|
|
||||||
val crossrefPublication: Dataset[(String, Publication)] =
|
val crossrefPublication: Dataset[(String, Publication)] =
|
||||||
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
|
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
|
||||||
|
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
|
||||||
.write
|
.write
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.save(s"$workingDirPath/firstJoin")
|
.save(s"$workingDirPath/firstJoin")
|
||||||
logger.info("Phase 3) Join Result with ORCID")
|
|
||||||
val fj: Dataset[(String, Publication)] =
|
|
||||||
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
|
|
||||||
val orcidPublication: Dataset[(String, Publication)] =
|
|
||||||
spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
|
|
||||||
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
|
|
||||||
.map(applyMerge)
|
|
||||||
.write
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.save(s"$workingDirPath/secondJoin")
|
|
||||||
|
|
||||||
logger.info("Phase 4) Join Result with MAG")
|
logger.info("Phase 2) Join Result with MAG")
|
||||||
val sj: Dataset[(String, Publication)] =
|
val sj: Dataset[(String, Publication)] =
|
||||||
spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
|
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
|
||||||
|
|
||||||
val magPublication: Dataset[(String, Publication)] =
|
val magPublication: Dataset[(String, Publication)] =
|
||||||
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
|
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
|
||||||
|
|
|
@ -372,7 +372,7 @@ case object Crossref2Oaf {
|
||||||
objectType,
|
objectType,
|
||||||
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
||||||
)
|
)
|
||||||
mappingResult(result, json, cOBJCategory, originalType)
|
mappingResult(result, json, cOBJCategory, objectSubType)
|
||||||
if (result == null || result.getId == null)
|
if (result == null || result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "mt",
|
||||||
|
"paramLongName": "master",
|
||||||
|
"paramDescription": "should be local or yarn",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "op",
|
||||||
|
"paramLongName": "orcidPath",
|
||||||
|
"paramDescription": "the path of the orcid Table generated by the dump",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "gp",
|
||||||
|
"paramLongName": "graphPath",
|
||||||
|
"paramDescription": "the path of the graph we want to apply enrichment",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "tp",
|
||||||
|
"paramLongName": "targetPath",
|
||||||
|
"paramDescription": "the output path of the graph enriched",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,34 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,88 @@
|
||||||
|
<workflow-app name="Enrich_graph_with_ORCID_Workflow" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>orcidPath</name>
|
||||||
|
<description>the path of the orcid Table generated by the dump</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>graphPath</name>
|
||||||
|
<description>the path of the graph we want to apply enrichment</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>targetPath</name>
|
||||||
|
<description>the output path of the graph enriched</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="EnrichGraph"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="EnrichGraph">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Enrich Graph with ORCID</name>
|
||||||
|
<class>eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.executor.memoryOverhead=2g
|
||||||
|
--conf spark.sql.shuffle.partitions=3000
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--orcidPath</arg>
|
||||||
|
<arg>${orcidPath}</arg>
|
||||||
|
<arg>--targetPath</arg>
|
||||||
|
<arg>${targetPath}</arg>
|
||||||
|
<arg>--graphPath</arg>
|
||||||
|
<arg>${graphPath}</arg>
|
||||||
|
<arg>--master</arg>
|
||||||
|
<arg>yarn</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_datasource">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<arg>${nameNode}/${graphPath}/datasource</arg>
|
||||||
|
<arg>${nameNode}/${targetPath}/datasource</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="copy_organization"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="copy_organization">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<arg>${nameNode}/${graphPath}/organization</arg>
|
||||||
|
<arg>${nameNode}/${targetPath}/organization</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="copy_project"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="copy_project">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<arg>${nameNode}/${graphPath}/project</arg>
|
||||||
|
<arg>${nameNode}/${targetPath}/project</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="copy_relation"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="copy_relation">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<arg>${nameNode}/${graphPath}/relation</arg>
|
||||||
|
<arg>${nameNode}/${targetPath}/relation</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,40 @@
|
||||||
|
package eu.dnetlib.dhp.enrich.orcid
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
|
||||||
|
import eu.dnetlib.dhp.schema.sx.OafUtils
|
||||||
|
import org.apache.spark.sql.Row
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
object AuthorEnricher extends Serializable {
|
||||||
|
|
||||||
|
def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
|
||||||
|
val a = new Author
|
||||||
|
a.setName(givenName)
|
||||||
|
a.setSurname(familyName)
|
||||||
|
a.setFullname(s"$givenName $familyName")
|
||||||
|
val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
|
||||||
|
pid.setDataInfo(OafUtils.generateDataInfo())
|
||||||
|
pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
|
||||||
|
a.setPid(List(pid).asJava)
|
||||||
|
a
|
||||||
|
}
|
||||||
|
|
||||||
|
def toOAFAuthor(r: Row): java.util.List[Author] = {
|
||||||
|
r.getList[Row](1)
|
||||||
|
.asScala
|
||||||
|
.map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
|
||||||
|
.toList
|
||||||
|
.asJava
|
||||||
|
}
|
||||||
|
|
||||||
|
// def enrichAuthor(p:Publication,r:Row): Unit = {
|
||||||
|
// val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
|
||||||
|
// println(k)
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,138 @@
|
||||||
|
package eu.dnetlib.dhp.enrich.orcid
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import eu.dnetlib.dhp.oa.merge.AuthorMerger
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
|
||||||
|
import org.apache.spark.sql.functions._
|
||||||
|
import org.apache.spark.sql._
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
val graphPath = parser.get("graphPath")
|
||||||
|
log.info(s"graphPath is '$graphPath'")
|
||||||
|
val orcidPath = parser.get("orcidPath")
|
||||||
|
log.info(s"orcidPath is '$orcidPath'")
|
||||||
|
val targetPath = parser.get("targetPath")
|
||||||
|
log.info(s"targetPath is '$targetPath'")
|
||||||
|
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
|
||||||
|
// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
|
||||||
|
|
||||||
|
enrichResult(
|
||||||
|
spark,
|
||||||
|
s"$graphPath/publication",
|
||||||
|
orcidPublication,
|
||||||
|
s"$targetPath/publication",
|
||||||
|
Encoders.bean(classOf[Publication])
|
||||||
|
)
|
||||||
|
enrichResult(
|
||||||
|
spark,
|
||||||
|
s"$graphPath/dataset",
|
||||||
|
orcidPublication,
|
||||||
|
s"$targetPath/dataset",
|
||||||
|
Encoders.bean(classOf[eu.dnetlib.dhp.schema.oaf.Dataset])
|
||||||
|
)
|
||||||
|
enrichResult(
|
||||||
|
spark,
|
||||||
|
s"$graphPath/software",
|
||||||
|
orcidPublication,
|
||||||
|
s"$targetPath/software",
|
||||||
|
Encoders.bean(classOf[Software])
|
||||||
|
)
|
||||||
|
enrichResult(
|
||||||
|
spark,
|
||||||
|
s"$graphPath/otherresearchproduct",
|
||||||
|
orcidPublication,
|
||||||
|
s"$targetPath/otherresearchproduct",
|
||||||
|
Encoders.bean(classOf[OtherResearchProduct])
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def enrichResult[T <: Result](
|
||||||
|
spark: SparkSession,
|
||||||
|
graphPath: String,
|
||||||
|
orcidPublication: Dataset[Row],
|
||||||
|
outputPath: String,
|
||||||
|
enc: Encoder[T]
|
||||||
|
): Unit = {
|
||||||
|
|
||||||
|
val entities = spark.read
|
||||||
|
.schema(enc.schema)
|
||||||
|
.json(graphPath)
|
||||||
|
.select(col("id"), col("datainfo"), col("instance"))
|
||||||
|
.where("datainfo.deletedbyinference != true")
|
||||||
|
.drop("datainfo")
|
||||||
|
.withColumn("instances", explode(col("instance")))
|
||||||
|
.withColumn("pids", explode(col("instances.pid")))
|
||||||
|
.select(
|
||||||
|
col("pids.qualifier.classid").alias("pid_schema"),
|
||||||
|
col("pids.value").alias("pid_value"),
|
||||||
|
col("id").alias("dnet_id")
|
||||||
|
)
|
||||||
|
|
||||||
|
val orcidDnet = orcidPublication
|
||||||
|
.join(
|
||||||
|
entities,
|
||||||
|
lower(col("schema")).equalTo(lower(col("pid_schema"))) &&
|
||||||
|
lower(col("value")).equalTo(lower(col("pid_value"))),
|
||||||
|
"inner"
|
||||||
|
)
|
||||||
|
.groupBy(col("dnet_id"))
|
||||||
|
.agg(collect_set(orcidPublication("author")).alias("orcid_authors"))
|
||||||
|
.select("dnet_id", "orcid_authors")
|
||||||
|
.cache()
|
||||||
|
orcidDnet.count()
|
||||||
|
val result = spark.read.schema(enc.schema).json(graphPath).as[T](enc)
|
||||||
|
|
||||||
|
result
|
||||||
|
.joinWith(orcidDnet, result("id").equalTo(orcidDnet("dnet_id")), "left")
|
||||||
|
.map {
|
||||||
|
case (r: T, null) =>
|
||||||
|
r
|
||||||
|
case (p: T, r: Row) =>
|
||||||
|
p.setAuthor(AuthorMerger.enrichOrcid(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
|
||||||
|
p
|
||||||
|
}(enc)
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def generateOrcidTable(spark: SparkSession, inputPath: String): Dataset[Row] = {
|
||||||
|
val orcidAuthors =
|
||||||
|
spark.read.load(s"$inputPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames")
|
||||||
|
val orcidWorks = spark.read
|
||||||
|
.load(s"$inputPath/Works")
|
||||||
|
.select(col("orcid"), explode(col("pids")).alias("identifier"))
|
||||||
|
.where(
|
||||||
|
"identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
|
||||||
|
)
|
||||||
|
val orcidPublication = orcidAuthors
|
||||||
|
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
|
||||||
|
.select(
|
||||||
|
col("identifier.schema").alias("schema"),
|
||||||
|
col("identifier.value").alias("value"),
|
||||||
|
struct(orcidAuthors("orcid").alias("orcid"), col("givenName"), col("familyName")).alias("author")
|
||||||
|
)
|
||||||
|
orcidPublication.cache()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object SparkEnrichGraphWithOrcidAuthors {
|
||||||
|
|
||||||
|
val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass)
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log)
|
||||||
|
.initialize()
|
||||||
|
.run()
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
package eu.dnetlib.dhp.enrich.orcid
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
|
||||||
|
import org.apache.spark.sql.{Column, Encoder, Encoders, Row, SparkSession}
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
import org.apache.spark.sql.functions._
|
||||||
|
|
||||||
|
class EnrichOrcidTest {
|
||||||
|
|
||||||
|
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||||
|
|
||||||
|
def test() = {
|
||||||
|
val spark = SparkSession.builder().master("local[*]").getOrCreate()
|
||||||
|
// spark.sparkContext.setLogLevel("ERROR")
|
||||||
|
|
||||||
|
// new SparkEnrichGraphWithOrcidAuthors(null, null, null)
|
||||||
|
// .enrichResult(
|
||||||
|
// spark,
|
||||||
|
// "/Users/sandro/orcid_test/publication",
|
||||||
|
// "",
|
||||||
|
// "/tmp/graph/",
|
||||||
|
// Encoders.bean(classOf[Publication])
|
||||||
|
// )
|
||||||
|
|
||||||
|
val schema = Encoders.bean(classOf[Publication]).schema
|
||||||
|
//
|
||||||
|
// val simplifyAuthor = udf((r: Seq[Row]) => {
|
||||||
|
// r
|
||||||
|
// .map(k =>
|
||||||
|
// AuthorPid(
|
||||||
|
// k.getAs[String]("fullname"),
|
||||||
|
// k.getAs[Seq[Row]]("pid")
|
||||||
|
// .map(p => Pid(p.getAs[Row]("qualifier").getAs[String]("classid"), p.getAs[String]("value")))
|
||||||
|
// .toList
|
||||||
|
// )
|
||||||
|
// )
|
||||||
|
// .filter(l => l.pids.nonEmpty)
|
||||||
|
// .toList
|
||||||
|
// })
|
||||||
|
//
|
||||||
|
// val wrong_orcid_intersection = udf((a: Seq[Row]) => {
|
||||||
|
// a.map(author => {
|
||||||
|
// val pids_with_orcid: Seq[Row] = author
|
||||||
|
// .getAs[Seq[Row]]("pids")
|
||||||
|
// .filter(p =>
|
||||||
|
// p.getAs[String]("pidScheme") != null && p.getAs[String]("pidScheme").toLowerCase.contains("orcid")
|
||||||
|
// )
|
||||||
|
// if (pids_with_orcid.exists(p => p.getAs[String]("pidScheme").equals("ORCID"))) {
|
||||||
|
// if (pids_with_orcid.map(p => p.getAs[String]("pidValue").toLowerCase).distinct.size > 1) {
|
||||||
|
// AuthorPid(
|
||||||
|
// author.getAs[String]("fullName"),
|
||||||
|
// pids_with_orcid.map(p => Pid(p.getAs[String]("pidScheme"), p.getAs[String]("pidValue"))).toList
|
||||||
|
// )
|
||||||
|
//
|
||||||
|
// } else
|
||||||
|
// null
|
||||||
|
// } else
|
||||||
|
// null
|
||||||
|
// }).filter(author => author != null)
|
||||||
|
// })
|
||||||
|
|
||||||
|
Encoders
|
||||||
|
import spark.implicits._
|
||||||
|
|
||||||
|
// val enriched = spark.read
|
||||||
|
// .schema(schema)
|
||||||
|
// .json("/Users/sandro/orcid_test/publication_enriched")
|
||||||
|
// .select(col("id"), explode(col("author")).as("authors"))
|
||||||
|
// .withColumn("ap", col("authors.pid.qualifier.classid"))
|
||||||
|
// .withColumn("dp", col("authors.pid.datainfo.provenanceAction.classid"))
|
||||||
|
//
|
||||||
|
// .show()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue