Merge pull request 'Master branch updates from beta December 2023' (#369) from beta_to_master_dicember2023 into master

Reviewed-on: D-Net/dnet-hadoop#369
This commit is contained in:
Claudio Atzori 2023-12-15 11:18:30 +01:00
commit c4ec35b6cd
191 changed files with 6725 additions and 1035 deletions

View File

@ -63,7 +63,10 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTermBySynonym(final String syn) { public VocabularyTerm getTermBySynonym(final String syn) {
return getTerm(synonyms.get(syn.toLowerCase())); return Optional
.ofNullable(syn)
.map(s -> getTerm(synonyms.get(s.toLowerCase())))
.orElse(null);
} }
public Qualifier getTermAsQualifier(final String termId) { public Qualifier getTermAsQualifier(final String termId) {

View File

@ -135,6 +135,24 @@ public class VocabularyGroup implements Serializable {
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn); return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
} }
public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
return find(vocId)
.map(
vocabulary -> Optional
.ofNullable(vocabulary.getTerm(syn))
.map(
term -> OafMapperUtils
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
.orElse(
Optional
.ofNullable(vocabulary.getTermBySynonym(syn))
.map(
term -> OafMapperUtils
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
.orElse(null)))
.orElse(null);
}
/** /**
* getSynonymAsQualifierCaseSensitive * getSynonymAsQualifierCaseSensitive
* *

View File

@ -119,6 +119,131 @@ public class AuthorMerger {
}); });
} }
public static String normalizeFullName(final String fullname) {
return nfd(fullname)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim();
}
private static String authorFieldToBeCompared(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return author.getSurname();
}
if (StringUtils.isNotBlank(author.getFullname())) {
return author.getFullname();
}
return null;
}
/**
* This method tries to figure out when two author are the same in the contest
* of ORCID enrichment
*
* @param left Author in the OAF entity
* @param right Author ORCID
* @return based on a heuristic on the names of the authors if they are the same.
*/
public static boolean checkORCIDSimilarity(final Author left, final Author right) {
final Person pl = parse(left);
final Person pr = parse(right);
// If one of them didn't have a surname we verify if they have the fullName not empty
// and verify if the normalized version is equal
if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
&& !pr.getFullname().isEmpty()) {
return pl
.getFullname()
.stream()
.anyMatch(
fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
} else {
return false;
}
}
// The Authors have one surname in common
if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
// If one of them has only a surname and is the same we can say that they are the same author
if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
return true;
// The authors have the same initials of Name in common
if (pl
.getName()
.stream()
.anyMatch(
nl -> pr
.getName()
.stream()
.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
return true;
}
// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
// We verify if we have an exact match between name and surname
if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
return true;
else
return false;
}
//
/**
* Method to enrich ORCID information in one list of authors based on another list
*
* @param baseAuthor the Author List in the OAF Entity
* @param orcidAuthor The list of ORCID Author intersected
* @return The Author List of the OAF Entity enriched with the orcid Author
*/
public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
if (baseAuthor == null || baseAuthor.isEmpty())
return orcidAuthor;
if (orcidAuthor == null || orcidAuthor.isEmpty())
return baseAuthor;
if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
return baseAuthor;
final List<Author> oAuthor = new ArrayList<>();
oAuthor.addAll(orcidAuthor);
baseAuthor.forEach(ba -> {
Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
if (aMatch.isPresent()) {
final Author sameAuthor = aMatch.get();
addPid(ba, sameAuthor.getPid());
oAuthor.remove(sameAuthor);
}
});
return baseAuthor;
}
private static void addPid(final Author a, final List<StructuredProperty> pids) {
if (a.getPid() == null) {
a.setPid(new ArrayList<>());
}
a.getPid().addAll(pids);
}
public static String pidToComparableString(StructuredProperty pid) { public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: ""; : "";
@ -171,7 +296,7 @@ public class AuthorMerger {
} }
} }
private static String normalize(final String s) { public static String normalize(final String s) {
String[] normalized = nfd(s) String[] normalized = nfd(s)
.toLowerCase() .toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError // do not compact the regexes in a single expression, would cause StackOverflowError

View File

@ -21,10 +21,15 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
/** /**
@ -35,6 +40,12 @@ public class GroupEntitiesSparkJob {
private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class); private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
private ArgumentApplicationParser parser;
public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
this.parser = parser;
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
@ -51,6 +62,17 @@ public class GroupEntitiesSparkJob {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
}
public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
throws ISLookUpException {
String graphInputPath = parser.get("graphInputPath"); String graphInputPath = parser.get("graphInputPath");
log.info("graphInputPath: {}", graphInputPath); log.info("graphInputPath: {}", graphInputPath);
@ -60,19 +82,21 @@ public class GroupEntitiesSparkJob {
String outputPath = parser.get("outputPath"); String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible")); boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
log.info("filterInvisible: {}", filterInvisible); log.info("filterInvisible: {}", filterInvisible);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses()); conf.registerKryoClasses(ModelSupport.getOafModelClasses());
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration()); HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible); groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
}); });
} }
@ -81,7 +105,7 @@ public class GroupEntitiesSparkJob {
String inputPath, String inputPath,
String checkpointPath, String checkpointPath,
String outputPath, String outputPath,
boolean filterInvisible) { boolean filterInvisible, VocabularyGroup vocs) {
Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC); Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
@ -106,10 +130,14 @@ public class GroupEntitiesSparkJob {
} }
Dataset<?> groupedEntities = allEntities Dataset<?> groupedEntities = allEntities
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) (b, a) -> OafMapperUtils.mergeEntities(b, a))
.map( .map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2( (MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
.applyCoarVocabularies(entity, vocs),
OAFENTITY_KRYO_ENC)
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
.map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
t._2().getClass().getName(), t._2()), t._2().getClass().getName(), t._2()),
Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC)); Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));

View File

@ -1,8 +1,12 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -19,6 +23,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -26,6 +31,10 @@ import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions { public class GraphCleaningFunctions extends CleaningFunctions {
public static final String DNET_PUBLISHERS = "dnet:publishers";
public static final String DNET_LICENSES = "dnet:licenses";
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})"; public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
public static final int ORCID_LEN = 19; public static final int ORCID_LEN = 19;
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
@ -36,6 +45,13 @@ public class GraphCleaningFunctions extends CleaningFunctions {
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+"; private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
private static final Set<String> INVALID_AUTHOR_NAMES = new HashSet<>();
private static final Set<String> INVALID_URLS = new HashSet<>();
private static final Set<String> INVALID_URL_HOSTS = new HashSet<>();
private static final HashSet<String> PEER_REVIEWED_TYPES = new HashSet<>(); private static final HashSet<String> PEER_REVIEWED_TYPES = new HashSet<>();
static { static {
@ -48,6 +64,47 @@ public class GraphCleaningFunctions extends CleaningFunctions {
PEER_REVIEWED_TYPES.add("Thesis"); PEER_REVIEWED_TYPES.add("Thesis");
PEER_REVIEWED_TYPES.add("Bachelor thesis"); PEER_REVIEWED_TYPES.add("Bachelor thesis");
PEER_REVIEWED_TYPES.add("Conference object"); PEER_REVIEWED_TYPES.add("Conference object");
INVALID_AUTHOR_NAMES.add("(:null)");
INVALID_AUTHOR_NAMES.add("(:unap)");
INVALID_AUTHOR_NAMES.add("(:tba)");
INVALID_AUTHOR_NAMES.add("(:unas)");
INVALID_AUTHOR_NAMES.add("(:unav)");
INVALID_AUTHOR_NAMES.add("(:unkn)");
INVALID_AUTHOR_NAMES.add("(:unkn) unknown");
INVALID_AUTHOR_NAMES.add(":none");
INVALID_AUTHOR_NAMES.add(":null");
INVALID_AUTHOR_NAMES.add(":unas");
INVALID_AUTHOR_NAMES.add(":unav");
INVALID_AUTHOR_NAMES.add(":unkn");
INVALID_AUTHOR_NAMES.add("[autor desconocido]");
INVALID_AUTHOR_NAMES.add("[s. n.]");
INVALID_AUTHOR_NAMES.add("[s.n]");
INVALID_AUTHOR_NAMES.add("[unknown]");
INVALID_AUTHOR_NAMES.add("anonymous");
INVALID_AUTHOR_NAMES.add("n.n.");
INVALID_AUTHOR_NAMES.add("nn");
INVALID_AUTHOR_NAMES.add("no name supplied");
INVALID_AUTHOR_NAMES.add("none");
INVALID_AUTHOR_NAMES.add("none available");
INVALID_AUTHOR_NAMES.add("not available not available");
INVALID_AUTHOR_NAMES.add("null &na;");
INVALID_AUTHOR_NAMES.add("null anonymous");
INVALID_AUTHOR_NAMES.add("unbekannt");
INVALID_AUTHOR_NAMES.add("unknown");
INVALID_URL_HOSTS.add("creativecommons.org");
INVALID_URL_HOSTS.add("www.academia.edu");
INVALID_URL_HOSTS.add("academia.edu");
INVALID_URL_HOSTS.add("researchgate.net");
INVALID_URL_HOSTS.add("www.researchgate.net");
INVALID_URLS.add("http://repo.scoap3.org/api");
INVALID_URLS.add("http://ora.ox.ac.uk/objects/uuid:");
INVALID_URLS.add("http://ntur.lib.ntu.edu.tw/news/agent_contract.pdf");
INVALID_URLS.add("https://media.springer.com/full/springer-instructions-for-authors-assets/pdf/SN_BPF_EN.pdf");
INVALID_URLS.add("http://www.tobaccoinduceddiseases.org/dl/61aad426c96519bea4040a374c6a6110/");
INVALID_URLS.add("https://www.bilboard.nl/verenigingsbladen/bestuurskundige-berichten");
} }
public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) { public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
@ -357,6 +414,14 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.getPublisher() .getPublisher()
.getValue() .getValue()
.replaceAll(NAME_CLEANING_REGEX, " ")); .replaceAll(NAME_CLEANING_REGEX, " "));
if (vocs.vocabularyExists(DNET_PUBLISHERS)) {
vocs
.find(DNET_PUBLISHERS)
.map(voc -> voc.getTermBySynonym(r.getPublisher().getValue()))
.map(VocabularyTerm::getName)
.ifPresent(publisher -> r.getPublisher().setValue(publisher));
}
} }
} }
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) { if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
@ -517,6 +582,14 @@ public class GraphCleaningFunctions extends CleaningFunctions {
i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
} }
if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) {
vocs
.find(DNET_LICENSES)
.map(voc -> voc.getTermBySynonym(i.getLicense().getValue()))
.map(VocabularyTerm::getId)
.ifPresent(license -> i.getLicense().setValue(license));
}
// from the script from Dimitris // from the script from Dimitris
if ("0000".equals(i.getRefereed().getClassid())) { if ("0000".equals(i.getRefereed().getClassid())) {
final boolean isFromCrossref = Optional final boolean isFromCrossref = Optional
@ -558,6 +631,15 @@ public class GraphCleaningFunctions extends CleaningFunctions {
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) { ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
i.setFulltext(null); i.setFulltext(null);
} }
if (Objects.nonNull(i.getUrl())) {
i
.setUrl(
i
.getUrl()
.stream()
.filter(GraphCleaningFunctions::urlFilter)
.collect(Collectors.toList()));
}
} }
} }
if (Objects.isNull(r.getBestaccessright()) if (Objects.isNull(r.getBestaccessright())
@ -580,8 +662,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.getAuthor() .getAuthor()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(GraphCleaningFunctions::isValidAuthorName)
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
.map(GraphCleaningFunctions::cleanupAuthor) .map(GraphCleaningFunctions::cleanupAuthor)
.collect(Collectors.toList())); .collect(Collectors.toList()));
@ -608,6 +689,9 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(p -> Objects.nonNull(p.getQualifier())) .filter(p -> Objects.nonNull(p.getQualifier()))
.filter(p -> StringUtils.isNotBlank(p.getValue())) .filter(p -> StringUtils.isNotBlank(p.getValue()))
.filter(
p -> StringUtils
.contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID))
.map(p -> { .map(p -> {
// hack to distinguish orcid from orcid_pending // hack to distinguish orcid from orcid_pending
String pidProvenance = getProvenance(p.getDataInfo()); String pidProvenance = getProvenance(p.getDataInfo());
@ -617,7 +701,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.toLowerCase() .toLowerCase()
.contains(ModelConstants.ORCID)) { .contains(ModelConstants.ORCID)) {
if (pidProvenance if (pidProvenance
.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { .equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) ||
pidProvenance.equals("ORCID_ENRICHMENT")) {
p.getQualifier().setClassid(ModelConstants.ORCID); p.getQualifier().setClassid(ModelConstants.ORCID);
} else { } else {
p.getQualifier().setClassid(ModelConstants.ORCID_PENDING); p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
@ -739,14 +824,32 @@ public class GraphCleaningFunctions extends CleaningFunctions {
// HELPERS // HELPERS
private static boolean isValidAuthorName(Author a) { private static boolean isValidAuthorName(Author a) {
return !Stream return StringUtils.isNotBlank(a.getFullname()) &&
StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")) &&
!INVALID_AUTHOR_NAMES.contains(StringUtils.lowerCase(a.getFullname()).trim()) &&
!Stream
.of(a.getFullname(), a.getName(), a.getSurname()) .of(a.getFullname(), a.getName(), a.getSurname())
.filter(s -> s != null && !s.isEmpty()) .filter(StringUtils::isNotBlank)
.collect(Collectors.joining("")) .collect(Collectors.joining(""))
.toLowerCase() .toLowerCase()
.matches(INVALID_AUTHOR_REGEX); .matches(INVALID_AUTHOR_REGEX);
} }
private static boolean urlFilter(String u) {
try {
final URL url = new URL(u);
if (StringUtils.isBlank(url.getPath()) || "/".equals(url.getPath())) {
return false;
}
if (INVALID_URL_HOSTS.contains(url.getHost())) {
return false;
}
return !INVALID_URLS.contains(url.toString());
} catch (MalformedURLException ex) {
return false;
}
}
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) { private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
return pids return pids
.stream() .stream()
@ -794,4 +897,105 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s; return s;
} }
public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
if (entity instanceof Result) {
final Result result = (Result) entity;
Optional
.ofNullable(result.getInstance())
.ifPresent(
instances -> instances
.forEach(
instance -> {
if (Objects.isNull(instance.getInstanceTypeMapping())) {
List<InstanceTypeMapping> mapping = Lists.newArrayList();
mapping
.add(
OafMapperUtils
.instanceTypeMapping(
instance.getInstancetype().getClassname(),
OPENAIRE_COAR_RESOURCE_TYPES_3_1));
instance.setInstanceTypeMapping(mapping);
}
Optional<InstanceTypeMapping> optionalItm = instance
.getInstanceTypeMapping()
.stream()
.filter(GraphCleaningFunctions::originalResourceType)
.findFirst();
if (optionalItm.isPresent()) {
InstanceTypeMapping coarItm = optionalItm.get();
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
.ifPresent(type -> {
coarItm.setTypeCode(type.getClassid());
coarItm.setTypeLabel(type.getClassname());
});
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
.ifPresent(
type -> mappings
.add(
OafMapperUtils
.instanceTypeMapping(coarItm.getTypeCode(), type)));
}
if (!mappings.isEmpty()) {
instance.getInstanceTypeMapping().addAll(mappings);
}
}
}));
result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
}
return entity;
}
private static boolean originalResourceType(InstanceTypeMapping itm) {
return StringUtils.isNotBlank(itm.getOriginalType()) &&
OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
StringUtils.isBlank(itm.getTypeCode()) &&
StringUtils.isBlank(itm.getTypeLabel());
}
private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
return Optional
.ofNullable(instances)
.map(ii -> {
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
Optional<InstanceTypeMapping> itm = ii
.stream()
.filter(Objects::nonNull)
.flatMap(
i -> Optional
.ofNullable(i.getInstanceTypeMapping())
.map(Collection::stream)
.orElse(Stream.empty()))
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
.findFirst();
if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
return null;
} else {
final String typeCode = itm.get().getTypeCode();
return Optional
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
.orElseThrow(
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
OPENAIRE_META_RESOURCE_TYPE));
}
} else {
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
}
})
.orElse(null);
}
} }

View File

@ -14,7 +14,6 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.AccessRightComparator; import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -141,6 +140,28 @@ public class OafMapperUtils {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
String vocabularyName) {
final InstanceTypeMapping m = new InstanceTypeMapping();
m.setVocabularyName(vocabularyName);
m.setOriginalType(originalType);
m.setTypeCode(code);
m.setTypeLabel(label);
return m;
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
}
public static InstanceTypeMapping instanceTypeMapping(String originalType) {
return instanceTypeMapping(originalType, null, null, null);
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
return instanceTypeMapping(originalType, null, null, vocabularyName);
}
public static Qualifier unknown(final String schemeid, final String schemename) { public static Qualifier unknown(final String schemeid, final String schemename) {
return qualifier(UNKNOWN, "Unknown", schemeid, schemename); return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
} }

View File

@ -28,5 +28,11 @@
"paramLongName": "filterInvisible", "paramLongName": "filterInvisible",
"paramDescription": "if true filters out invisible entities", "paramDescription": "if true filters out invisible entities",
"paramRequired": true "paramRequired": true
},
{
"paramName": "isu",
"paramLongName": "isLookupUrl",
"paramDescription": "url to the ISLookup Service",
"paramRequired": true
} }
] ]

View File

@ -0,0 +1,114 @@
package eu.dnetlib.oa.merge;
import static org.junit.jupiter.api.Assertions.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Objects;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.oaf.Author;
public class AuthorMergerTest {
@Test
public void testEnrcichAuthor() throws Exception {
final ObjectMapper mapper = new ObjectMapper();
BufferedReader pr = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
BufferedReader or = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
};
String pubLine;
int i = 0;
while ((pubLine = pr.readLine()) != null) {
final String pubId = pubLine;
final String MatchPidOrcid = or.readLine();
final String pubOrcid = or.readLine();
final String data = pr.readLine();
if (StringUtils.isNotBlank(data)) {
List<Author> publicationAuthors = mapper.readValue(data, aclass);
List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
System.out.printf("OAF ID = %s \n", pubId);
System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
long originalAuthorWithPiD = publicationAuthors
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long start = System.currentTimeMillis();
// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
long enrichedAuthorWithPid = enrichedList
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long totalTime = (System.currentTimeMillis() - start) / 1000;
System.out
.printf(
"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
enrichedAuthorWithPid);
System.out.println("=================");
}
}
}
@Test
public void checkSimilarityTest() {
final Author left = new Author();
left.setName("Anand");
left.setSurname("Rachna");
left.setFullname("Anand, Rachna");
System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
final Author right = new Author();
right.setName("Rachna");
right.setSurname("Anand");
right.setFullname("Rachna, Anand");
// System.out.println(AuthorMerger.normalize(right.getFullname()));
boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
assertTrue(same);
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class DownloadORCIDDumpApplication {
private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
private final FileSystem fileSystem;
public DownloadORCIDDumpApplication(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final String apiURL = argumentParser.get("apiURL");
log.info("apiURL is {}", apiURL);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
}
private void downloadItem(final String name, final String itemURL, final String basePath) {
try {
final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
final HttpGet request = new HttpGet(itemURL);
final int timeout = 60; // seconds
final RequestConfig config = RequestConfig
.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000)
.build();
log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
CloseableHttpResponse response = client.execute(request)) {
int responseCode = response.getStatusLine().getStatusCode();
log.info("Response code is {}", responseCode);
if (responseCode >= 200 && responseCode < 400) {
IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
}
} catch (Throwable eu) {
throw new RuntimeException(eu);
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
protected void run(final String targetPath, final String apiURL) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final URL url = new URL(apiURL);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
final String json = IOUtils.toString(is);
JsonNode jsonNode = mapper.readTree(json);
jsonNode
.get("files")
.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
}
}

View File

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class ExtractORCIDDump {
private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
private final FileSystem fileSystem;
public ExtractORCIDDump(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String sourcePath = argumentParser.get("sourcePath");
log.info("sourcePath is {}", sourcePath);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
}
public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
final List<ORCIDExtractor> workers = new ArrayList<>();
int i = 0;
while (ls.hasNext()) {
LocatedFileStatus current = ls.next();
if (current.getPath().getName().endsWith("tar.gz")) {
workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
}
}
workers.forEach(Thread::start);
for (ORCIDExtractor worker : workers) {
worker.join();
}
}
}

View File

@ -0,0 +1,171 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**\
* The ORCIDExtractor class extracts ORCID data from a TAR archive.
* The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
* Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
* Finally, it closes all the SequenceFile.Writer objects.
*/
public class ORCIDExtractor extends Thread {
private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
private final FileSystem fileSystem;
private final String id;
private final Path sourcePath;
private final String baseOutputPath;
public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
this.fileSystem = fileSystem;
this.id = id;
this.sourcePath = sourcePath;
this.baseOutputPath = baseOutputPath;
}
/**
* creates a map of SequenceFile.Writer objects,
* one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
* For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
* object that writes employment data.
* @return the Map
*/
private Map<String, SequenceFile.Writer> createMap() {
try {
log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
Map<String, SequenceFile.Writer> res = new HashMap<>();
if (sourcePath.getName().contains("summaries")) {
final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
final SequenceFile.Writer summary_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(summaryPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
log.info("Thread {} Creating only summary path here {}", id, summaryPath);
res.put("summary", summary_file);
return res;
} else {
String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
final SequenceFile.Writer employments_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(employmentsPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("employments", employments_file);
log.info("Thread {} Creating employments path here {}", id, employmentsPath);
final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
final SequenceFile.Writer works_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(worksPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("works", works_file);
log.info("Thread {} Creating works path here {}", id, worksPath);
return res;
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
@Override
public void run() {
CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
CompressionCodec codec = factory.getCodec(sourcePath);
if (codec == null) {
System.err.println("No codec found for " + sourcePath.getName());
System.exit(1);
}
InputStream gzipInputStream = null;
try {
gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
final Map<String, SequenceFile.Writer> fileMap = createMap();
iterateTar(fileMap, gzipInputStream);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
log.info("Closing gzip stream");
IOUtils.closeStream(gzipInputStream);
}
}
private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
if (sourcePath.getName().contains("summaries")) {
return fileMap.get("summary");
}
if (path.contains("works")) {
return fileMap.get("works");
}
if (path.contains("employments"))
return fileMap.get("employments");
return null;
}
private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
int extractedItem = 0;
try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry;
while ((entry = tais.getNextTarEntry()) != null) {
if (entry.isFile()) {
final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
if (fl != null) {
final Text key = new Text(entry.getName());
final Text value = new Text(
org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
fl.append(key, value);
extractedItem++;
if (extractedItem % 100000 == 0) {
log.info("Thread {}: Extracted {} items", id, extractedItem);
break;
}
}
}
}
} finally {
for (SequenceFile.Writer k : fileMap.values()) {
log.info("Thread {}: Completed processed {} items", id, extractedItem);
k.hflush();
k.close();
}
}
}
}

View File

@ -0,0 +1,251 @@
package eu.dnetlib.dhp.collection.orcid;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ximpleware.*;
import eu.dnetlib.dhp.collection.orcid.model.*;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
public class OrcidParser {
final Logger log = LoggerFactory.getLogger(OrcidParser.class);
private VTDNav vn;
private AutoPilot ap;
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
private static final String NS_COMMON = "common";
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
private static final String NS_PERSON = "person";
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
private static final String NS_DETAILS = "personal-details";
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
private static final String NS_OTHER = "other-name";
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
private static final String NS_RECORD = "record";
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
private static final String NS_ACTIVITIES = "activities";
private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
private static final String NS_WORK = "work";
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
private static final String NS_ERROR = "error";
private static final String NS_HISTORY = "history";
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
private static final String NS_BULK = "bulk";
private static final String NS_EXTERNAL = "external-identifier";
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
private void generateParsedDocument(final String xml) throws ParseException {
final VTDGen vg = new VTDGen();
vg.setDoc(xml.getBytes());
vg.parse(true);
this.vn = vg.getNav();
this.ap = new AutoPilot(vn);
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
}
public Author parseSummary(final String xml) {
try {
final Author author = new Author();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//record:record", Arrays.asList("path"));
if (!recordNodes.isEmpty()) {
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
author.setOrcid(oid);
} else {
return null;
}
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//person:name", Arrays.asList("visibility"));
final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
author.setVisibility(visibility);
final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
author.setGivenName(name);
final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
author.setFamilyName(surnames);
final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
author.setCreditName(creditNames);
final String biography = VtdUtilityParser
.getSingleValue(ap, vn, "//person:biography/personal-details:content");
author.setBiography(biography);
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
if (!otherNames.isEmpty()) {
author.setOtherNames(otherNames);
}
ap.selectXPath("//external-identifier:external-identifier");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
author.addOtherPid(pid);
}
return author;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
public Work parseWork(final String xml) {
try {
final Work work = new Work();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
if (!workNodes.isEmpty()) {
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
work.setOrcid(oid);
} else {
return null;
}
ap.selectXPath("//common:external-id");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
work.addPid(pid);
}
work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
return work;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
private String extractEmploymentDate(final String xpath) throws Exception {
ap.selectXPath(xpath);
StringBuilder sb = new StringBuilder();
while (ap.evalXPath() != -1) {
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:year");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:month");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:day");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
}
return sb.toString();
}
public Employment parseEmployment(final String xml) {
try {
final Employment employment = new Employment();
generateParsedDocument(xml);
final String oid = VtdUtilityParser
.getSingleValue(ap, vn, "//common:source-orcid/common:path");
if (StringUtils.isNotBlank(oid)) {
employment.setOrcid(oid);
} else {
return null;
}
final String depName = VtdUtilityParser
.getSingleValue(ap, vn, "//common:department-name");
final String rolTitle = VtdUtilityParser
.getSingleValue(ap, vn, "//common:role-title");
if (StringUtils.isNotBlank(rolTitle))
employment.setRoleTitle(rolTitle);
if (StringUtils.isNotBlank(depName))
employment.setDepartmentName(depName);
else
employment
.setDepartmentName(
VtdUtilityParser
.getSingleValue(ap, vn, "//common:organization/common:name"));
employment.setStartDate(extractEmploymentDate("//common:start-date"));
employment.setEndDate(extractEmploymentDate("//common:end-date"));
final String affiliationId = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
final String affiliationIdType = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguation-source");
if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
return employment;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
}

View File

@ -0,0 +1,83 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Author extends ORCIDItem {
private String givenName;
private String familyName;
private String visibility;
private String creditName;
private List<String> otherNames;
private List<Pid> otherPids;
private String biography;
public String getBiography() {
return biography;
}
public void setBiography(String biography) {
this.biography = biography;
}
public String getGivenName() {
return givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public String getFamilyName() {
return familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public String getCreditName() {
return creditName;
}
public void setCreditName(String creditName) {
this.creditName = creditName;
}
public List<String> getOtherNames() {
return otherNames;
}
public void setOtherNames(List<String> otherNames) {
this.otherNames = otherNames;
}
public String getVisibility() {
return visibility;
}
public void setVisibility(String visibility) {
this.visibility = visibility;
}
public List<Pid> getOtherPids() {
return otherPids;
}
public void setOtherPids(List<Pid> otherPids) {
this.otherPids = otherPids;
}
public void addOtherPid(final Pid pid) {
if (otherPids == null)
otherPids = new ArrayList<>();
otherPids.add(pid);
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Employment extends ORCIDItem {
private String startDate;
private String EndDate;
private Pid affiliationId;
private String departmentName;
private String roleTitle;
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return EndDate;
}
public void setEndDate(String endDate) {
EndDate = endDate;
}
public Pid getAffiliationId() {
return affiliationId;
}
public void setAffiliationId(Pid affiliationId) {
this.affiliationId = affiliationId;
}
public String getDepartmentName() {
return departmentName;
}
public void setDepartmentName(String departmentName) {
this.departmentName = departmentName;
}
public String getRoleTitle() {
return roleTitle;
}
public void setRoleTitle(String roleTitle) {
this.roleTitle = roleTitle;
}
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class ORCIDItem {
private String orcid;
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Pid {
private String value;
private String schema;
public Pid() {
}
public Pid(String value, String schema) {
this.value = value;
this.schema = schema;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getSchema() {
return schema;
}
public void setSchema(String schema) {
this.schema = schema;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Work extends ORCIDItem {
private String title;
private List<Pid> pids;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public List<Pid> getPids() {
return pids;
}
public void setPids(List<Pid> pids) {
this.pids = pids;
}
public void addPid(Pid pid) {
if (pids == null)
pids = new ArrayList<>();
pids.add(pid);
}
}

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH to extract files",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH where the tar.gz files were downloaded",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "m",
"paramLongName": "master",
"paramDescription": "the master name",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH of the DF tables",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH of the ORCID sequence file",
"paramRequired": true
}
]

View File

@ -0,0 +1,23 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,81 @@
<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>targetPath</name>
<description>the path to store the original ORCID dump</description>
</property>
<property>
<name>apiURL</name>
<description>The figshare API URL to retrieve the list file to download</description>
</property>
</parameters>
<start to="generateTables"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadDUMP">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--apiURL</arg><arg>${apiURL}</arg>
</java>
<ok to="extractDump"/>
<error to="Kill"/>
</action>
<action name="extractDump">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
<java-opts> -Xmx6g </java-opts>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${targetPath}</arg>
<arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
</java>
<ok to="generateTables"/>
<error to="Kill"/>
</action>
<action name="generateTables">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Generate ORCID Tables</name>
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}/tables</arg>
<arg>--master</arg><arg>yarn</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.collection.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
import org.apache.hadoop.io.Text
import org.apache.spark.SparkContext
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = {
val sourcePath: String = parser.get("sourcePath")
log.info("found parameters sourcePath: {}", sourcePath)
val targetPath: String = parser.get("targetPath")
log.info("found parameters targetPath: {}", targetPath)
extractORCIDTable(spark, sourcePath, targetPath)
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
extractORCIDWorksTable(spark, sourcePath, targetPath)
}
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("summaries"))
.map { r =>
val p = new OrcidParser
p.parseSummary(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Authors")
}
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("works"))
.map { r =>
val p = new OrcidParser
p.parseWork(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Works")
}
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("employments"))
.map { r =>
val p = new OrcidParser
p.parseEmployment(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Employments")
}
}
object SparkGenerateORCIDTable {
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
def main(args: Array[String]): Unit = {
new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
.initialize()
.run()
}
}

View File

@ -166,7 +166,7 @@ object DataciteToOAFTransformation {
resourceTypeGeneral: String, resourceTypeGeneral: String,
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): (Qualifier, Qualifier) = { ): (Qualifier, Qualifier, String) = {
if (resourceType != null && resourceType.nonEmpty) { if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier = val typeQualifier =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ),
resourceType
) )
} }
if (schemaOrg != null && schemaOrg.nonEmpty) { if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ),
schemaOrg
) )
} }
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ),
resourceTypeGeneral
) )
} }
@ -216,12 +219,18 @@ object DataciteToOAFTransformation {
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): Result = { ): Result = {
val typeQualifiers: (Qualifier, Qualifier) = val typeQualifiers: (Qualifier, Qualifier, String) =
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
if (typeQualifiers == null) if (typeQualifiers == null)
return null return null
val i = new Instance val i = new Instance
i.setInstancetype(typeQualifiers._1) i.setInstancetype(typeQualifiers._1)
// ADD ORIGINAL TYPE
val itm = new InstanceTypeMapping
itm.setOriginalType(typeQualifiers._3)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
typeQualifiers._2.getClassname match { typeQualifiers._2.getClassname match {
case "dataset" => case "dataset" =>
val r = new OafDataset val r = new OafDataset

View File

@ -176,7 +176,7 @@ object BioDBToOAF {
i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava) i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava)
} }
if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) {
i.setInstancetype( i.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0037", "0037",
@ -185,7 +185,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
else val itm = new InstanceTypeMapping
itm.setOriginalType(input.pidType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
} else {
i.setInstancetype( i.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
@ -194,6 +198,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
}
if (input.datasource == null || input.datasource.isEmpty) if (input.datasource == null || input.datasource.isEmpty)
return null return null
@ -265,6 +274,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("uniprot")) i.setCollectedfrom(collectedFromMap("uniprot"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
@ -471,6 +484,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("pdb")) i.setCollectedfrom(collectedFromMap("pdb"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
@ -571,6 +588,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("ebi")) i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)

View File

@ -188,13 +188,24 @@ object PubMedToOaf {
val cojbCategory = val cojbCategory =
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue)
pubmedInstance.setInstancetype(cojbCategory) pubmedInstance.setInstancetype(cojbCategory)
// ADD ORIGINAL TYPE to the publication
val itm = new InstanceTypeMapping
itm.setOriginalType(ja.get.getValue)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else { } else {
val i_type = article.getPublicationTypes.asScala val i_type = article.getPublicationTypes.asScala
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.find(q => q != null) .find(q => q._2 != null)
if (i_type.isDefined)
pubmedInstance.setInstancetype(i_type.get) if (i_type.isDefined) {
else pubmedInstance.setInstancetype(i_type.get._2)
// ADD ORIGINAL TYPE to the publication
val itm = new InstanceTypeMapping
itm.setOriginalType(i_type.get._1)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else
return null return null
} }
val result = createResult(pubmedInstance.getInstancetype, vocabularies) val result = createResult(pubmedInstance.getInstancetype, vocabularies)

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.dnetlib.dhp.collection.orcid.model.Author;
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
import eu.dnetlib.dhp.parser.utility.VtdException;
public class DownloadORCIDTest {
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
@Test
public void testSummary() throws Exception {
final String xml = IOUtils
.toString(
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
final OrcidParser parser = new OrcidParser();
ORCIDItem orcidItem = parser.parseSummary(xml);
final ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(orcidItem));
}
@Test
public void testParsingWork() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
@Test
public void testParsingEmployments() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
}

View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
<common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
<common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
</common:source>
<work:title>
<common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
</work:title>
<work:journal-title>American Journal of Neuroradiology</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2014</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
<common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-84911865199</common:external-id-value>
<common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Durst, C.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Starke, R.M.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Gaughen, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Nguyen, Q.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Patrie, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Jensen, M.E.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Evans, A.J.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
path="/0000-0001-5349-4030/work/50101152" visibility="public">
<common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
<common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
</common:source>
<work:title>
<common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
racially homogenous and heterogeneous U.S. history classrooms</common:title>
</work:title>
<work:journal-title>Journal of Social Studies Research</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
candor in addressing social injustices in racially homogenous and heterogeneous U.S.
history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
= {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2018</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
<common:external-id-normalized transient="true"
>10.1016/j.jssr.2018.01.004</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-85041949043</common:external-id-value>
<common:external-id-normalized transient="true"
>2-s2.0-85041949043</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Parkhouse, H.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Massaro, V.R.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Abdel-Dayem K</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Abdel-Dayem Fake</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Khair Abde Daye</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:contributor-attributes>
<work:contributor-sequence>seq0</work:contributor-sequence>
<work:contributor-role>role0</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname1</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>creditname2</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq2</work:contributor-sequence>
<work:contributor-role></work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname3</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence></work:contributor-sequence>
<work:contributor-role>role3</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name></work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq4</work:contributor-sequence>
<work:contributor-role>role4</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
visibility="public">
<common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
<common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Quang Nguyen</common:source-name>
</common:source>
<common:organization>
<common:name>Beth Israel Deaconess Medical Center</common:name>
<common:address>
<common:city>Boston</common:city>
<common:region>MA</common:region>
<common:country>US</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
visibility="public">
<common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
<common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
<common:path>0000-0001-5011-3001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>zhengyan li</common:source-name>
</common:source>
<common:start-date>
<common:year>2008</common:year>
<common:month>09</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Anhui Academy of Agricultural Sciences</common:name>
<common:address>
<common:city>Hefei</common:city>
<common:region>Anhui</common:region>
<common:country>CN</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
visibility="public">
<common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
<common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
<common:path>0000-0001-5012-1001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Asma Bazzi</common:source-name>
</common:source>
<common:department-name>Pathology and Laboratory Medicine</common:department-name>
<common:role-title>Medical Laboratory Technologist</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>01</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>06</common:month>
<common:day>30</common:day>
</common:end-date>
<common:organization>
<common:name>American University of Beirut</common:name>
<common:address>
<common:city>Hamra</common:city>
<common:region>Beirut</common:region>
<common:country>LB</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,581 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
<common:orcid-identifier>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:orcid-identifier>
<preferences:preferences>
<preferences:locale>es</preferences:locale>
</preferences:preferences>
<history:history>
<history:creation-method>Direct</history:creation-method>
<history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<history:claimed>true</history:claimed>
<history:verified-email>true</history:verified-email>
<history:verified-primary-email>true</history:verified-primary-email>
</history:history>
<person:person path="/0000-0001-5045-1000/person">
<person:name visibility="public" path="0000-0001-5045-1000">
<common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
<common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
<personal-details:given-names>Patricio</personal-details:given-names>
<personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
</person:name>
<other-name:other-names path="/0000-0001-5045-1000/other-names"/>
<person:biography visibility="public" path="/0000-0001-5045-1000/biography">
<common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
<common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
<personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
</person:biography>
<researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
<email:emails path="/0000-0001-5045-1000/email"/>
<address:addresses path="/0000-0001-5045-1000/address"/>
<keyword:keywords path="/0000-0001-5045-1000/keywords"/>
<external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
<common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
<common:path>0000-0001-7291-3210</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
</common:source>
<common:external-id-type>Scopus Author ID</common:external-id-type>
<common:external-id-value>6602255248</common:external-id-value>
<common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
<common:external-id-relationship>self</common:external-id-relationship>
</external-identifier:external-identifier>
</external-identifier:external-identifiers>
</person:person>
<activities:activities-summary path="/0000-0001-5045-1000/activities">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:distinctions path="/0000-0001-5045-1000/distinctions">
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:external-ids/>
<distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
<common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Maestría</common:department-name>
<common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</distinction:distinction-summary>
</activities:affiliation-group>
</activities:distinctions>
<activities:educations path="/0000-0001-5045-1000/educations">
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:external-ids/>
<education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
<common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Doctorado en Sociología</common:department-name>
<common:role-title>Doctorando del Programa de Sociología</common:role-title>
<common:start-date>
<common:year>2020</common:year>
<common:month>11</common:month>
<common:day>06</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
<common:address>
<common:city>Madrid</common:city>
<common:region>Comunidad de Madrid</common:region>
<common:country>ES</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</education:education-summary>
</activities:affiliation-group>
</activities:educations>
<activities:employments path="/0000-0001-5045-1000/employments">
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
<common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Especialista de Proyectos y docente</common:role-title>
<common:start-date>
<common:year>2021</common:year>
<common:month>11</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad de las Artes</common:name>
<common:address>
<common:city>Guayaquil</common:city>
<common:region>Guayas</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
<common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Director</common:role-title>
<common:start-date>
<common:year>2019</common:year>
<common:month>11</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>2021</common:year>
<common:month>10</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Regional Amazónica IKIAM</common:name>
<common:address>
<common:city>Tena</common:city>
<common:region>Napo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
<common:url>http://ikiam.edu.ec</common:url>
</employment:employment-summary>
</activities:affiliation-group>
</activities:employments>
<activities:fundings path="/0000-0001-5045-1000/fundings"/>
<activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
<activities:memberships path="/0000-0001-5045-1000/memberships">
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:external-ids/>
<membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
<common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Artes Escénicas</common:department-name>
<common:role-title>Miembro</common:role-title>
<common:start-date>
<common:year>2000</common:year>
<common:month>07</common:month>
<common:day>15</common:day>
</common:start-date>
<common:organization>
<common:name>Casa de la Cultura Ecuatoriana</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Sierra Centro</common:region>
<common:country>EC</common:country>
</common:address>
</common:organization>
</membership:membership-summary>
</activities:affiliation-group>
</activities:memberships>
<activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
<activities:qualifications path="/0000-0001-5045-1000/qualifications">
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
<common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Gobernabilidad</common:department-name>
<common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:end-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:end-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
<common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Posgrados</common:department-name>
<common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
<common:start-date>
<common:year>2001</common:year>
<common:month>03</common:month>
<common:day>09</common:day>
</common:start-date>
<common:end-date>
<common:year>2003</common:year>
<common:month>02</common:month>
<common:day>27</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
<common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Ciencias de la Educación</common:department-name>
<common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>03</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>01</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
<common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Facultad de Artes</common:department-name>
<common:role-title>Licenciado en Artes</common:role-title>
<common:start-date>
<common:year>1989</common:year>
<common:month>09</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>1997</common:year>
<common:month>08</common:month>
<common:day>07</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Central del Ecuador</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
</activities:qualifications>
<activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
<activities:services path="/0000-0001-5045-1000/services"/>
<activities:works path="/0000-0001-5045-1000/works">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:group>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>isbn</common:external-id-type>
<common:external-id-value>978-9942-29-089-2</common:external-id-value>
<common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
<common:external-id-relationship>part-of</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:type>book-chapter</work:type>
<common:publication-date>
<common:year>2023</common:year>
<common:month>06</common:month>
<common:day>07</common:day>
</common:publication-date>
<work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
<common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>conference-abstract</work:type>
<common:publication-date>
<common:year>2022</common:year>
<common:month>10</common:month>
<common:day>06</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>conference-poster</work:type>
<common:publication-date>
<common:year>2018</common:year>
<common:month>11</common:month>
<common:day>30</common:day>
</common:publication-date>
<work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>dissertation-thesis</work:type>
<common:publication-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>La Rebelión de los Dioses</common:title>
</work:title>
<common:external-ids/>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>registered-copyright</work:type>
<common:publication-date>
<common:year>2001</common:year>
<common:month>08</common:month>
<common:day>28</common:day>
</common:publication-date>
<work:journal-title>Editorial pedagógica freire</work:journal-title>
</work:work-summary>
</activities:group>
</activities:works>
</activities:activities-summary>
</record:record>

View File

@ -16,6 +16,10 @@
<name>filterInvisible</name> <name>filterInvisible</name>
<description>whether filter out invisible entities after merge</description> <description>whether filter out invisible entities after merge</description>
</property> </property>
<property>
<name>isLookupUrl</name>
<description>the URL address of the lookUp service</description>
</property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>
<description>heap memory for driver process</description> <description>heap memory for driver process</description>
@ -128,6 +132,7 @@
<arg>--graphInputPath</arg><arg>${graphBasePath}</arg> <arg>--graphInputPath</arg><arg>${graphBasePath}</arg>
<arg>--checkpointPath</arg><arg>${workingPath}/grouped_entities</arg> <arg>--checkpointPath</arg><arg>${workingPath}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}</arg> <arg>--outputPath</arg><arg>${graphOutputPath}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--filterInvisible</arg><arg>${filterInvisible}</arg> <arg>--filterInvisible</arg><arg>${filterInvisible}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>

View File

@ -133,32 +133,6 @@
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg> <arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
<arg>--master</arg><arg>yarn-cluster</arg> <arg>--master</arg><arg>yarn-cluster</arg>
</spark> </spark>
<ok to="PreProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="PreProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -59,10 +59,10 @@
</property> </property>
<!-- ORCID Parameters --> <!-- ORCID Parameters -->
<property> <!-- <property>-->
<name>workingPathOrcid</name> <!-- <name>workingPathOrcid</name>-->
<description>the ORCID working path</description> <!-- <description>the ORCID working path</description>-->
</property> <!-- </property>-->
</parameters> </parameters>
@ -84,7 +84,6 @@
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case> <case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case> <case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case> <case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
<case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case> <case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
<case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case> <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
<default to="ConvertCrossrefToOAF"/> <default to="ConvertCrossrefToOAF"/>
@ -170,32 +169,6 @@
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg> <arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg> <arg>--master</arg><arg>yarn-cluster</arg>
</spark> </spark>
<ok to="ProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="ProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="CreateDOIBoost"/> <ok to="CreateDOIBoost"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
Encoders.tuple(Encoders.STRING, mapEncoderPub) Encoders.tuple(Encoders.STRING, mapEncoderPub)
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
logger.info("Phase 2) Join Crossref with UnpayWall") logger.info("Phase 1) Join Crossref with UnpayWall")
val crossrefPublication: Dataset[(String, Publication)] = val crossrefPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
.write .write
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.save(s"$workingDirPath/firstJoin") .save(s"$workingDirPath/firstJoin")
logger.info("Phase 3) Join Result with ORCID")
val fj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val orcidPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
.map(applyMerge)
.write
.mode(SaveMode.Overwrite)
.save(s"$workingDirPath/secondJoin")
logger.info("Phase 4) Join Result with MAG") logger.info("Phase 2) Join Result with MAG")
val sj: Dataset[(String, Publication)] = val sj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val magPublication: Dataset[(String, Publication)] = val magPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))

View File

@ -107,7 +107,7 @@ case object Crossref2Oaf {
.map(f => f.id) .map(f => f.id)
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
@ -283,6 +283,11 @@ case object Crossref2Oaf {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
//ADD ORIGINAL TYPE to the mapping
val itm = new InstanceTypeMapping
itm.setOriginalType(originalType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
instance.setInstanceTypeMapping(List(itm).asJava)
result.setResourcetype( result.setResourcetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
cobjCategory.substring(0, 4), cobjCategory.substring(0, 4),
@ -367,7 +372,9 @@ case object Crossref2Oaf {
objectType, objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type") mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
) )
mappingResult(result, json, cOBJCategory)
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
mappingResult(result, json, cOBJCategory, originalType)
if (result == null || result.getId == null) if (result == null || result.getId == null)
return List() return List()

View File

@ -14,8 +14,6 @@ import org.jetbrains.annotations.NotNull;
* @Date 06/10/23 * @Date 06/10/23
*/ */
public class QueryCommunityAPI { public class QueryCommunityAPI {
private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
private static final String BETA_BASE_URL = "https://beta.services.openaire.eu/openaire/";
private static String get(String geturl) throws IOException { private static String get(String geturl) throws IOException {
URL url = new URL(geturl); URL url = new URL(geturl);
@ -32,35 +30,34 @@ public class QueryCommunityAPI {
return body; return body;
} }
public static String communities(boolean production) throws IOException { public static String communities(String baseURL) throws IOException {
if (production)
return get(PRODUCTION_BASE_URL + "community/communities");
return get(BETA_BASE_URL + "community/communities");
}
public static String community(String id, boolean production) throws IOException { return get(baseURL + "communities");
if (production)
return get(PRODUCTION_BASE_URL + "community/" + id);
return get(BETA_BASE_URL + "community/" + id);
}
public static String communityDatasource(String id, boolean production) throws IOException {
if (production)
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
return (BETA_BASE_URL + "community/" + id + "/contentproviders");
} }
public static String communityPropagationOrganization(String id, boolean production) throws IOException { public static String community(String id, String baseURL) throws IOException {
if (production)
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); return get(baseURL + id);
return get(BETA_BASE_URL + "community/" + id + "/propagationOrganizations");
} }
public static String communityProjects(String id, String page, String size, boolean production) throws IOException { public static String communityDatasource(String id, String baseURL) throws IOException {
if (production)
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); return get(baseURL + id + "/contentproviders");
return get(BETA_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
}
public static String communityPropagationOrganization(String id, String baseURL) throws IOException {
return get(baseURL + id + "/propagationOrganizations");
}
public static String communityProjects(String id, String page, String size, String baseURL) throws IOException {
return get(baseURL + id + "/projects/" + page + "/" + size);
} }
@NotNull @NotNull

View File

@ -9,6 +9,8 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.management.Query;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -22,6 +24,7 @@ import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider; import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
@ -33,14 +36,14 @@ public class Utils implements Serializable {
private static final Logger log = LoggerFactory.getLogger(Utils.class); private static final Logger log = LoggerFactory.getLogger(Utils.class);
public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException { public static CommunityConfiguration getCommunityConfiguration(String baseURL) throws IOException {
final Map<String, Community> communities = Maps.newHashMap(); final Map<String, Community> communities = Maps.newHashMap();
List<Community> validCommunities = new ArrayList<>(); List<Community> validCommunities = new ArrayList<>();
getValidCommunities(production) getValidCommunities(baseURL)
.forEach(community -> { .forEach(community -> {
try { try {
CommunityModel cm = MAPPER CommunityModel cm = MAPPER
.readValue(QueryCommunityAPI.community(community.getId(), production), CommunityModel.class); .readValue(QueryCommunityAPI.community(community.getId(), baseURL), CommunityModel.class);
validCommunities.add(getCommunity(cm)); validCommunities.add(getCommunity(cm));
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -50,7 +53,7 @@ public class Utils implements Serializable {
try { try {
DatasourceList dl = MAPPER DatasourceList dl = MAPPER
.readValue( .readValue(
QueryCommunityAPI.communityDatasource(community.getId(), production), DatasourceList.class); QueryCommunityAPI.communityDatasource(community.getId(), baseURL), DatasourceList.class);
community.setProviders(dl.stream().map(d -> { community.setProviders(dl.stream().map(d -> {
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
return null; return null;
@ -95,9 +98,9 @@ public class Utils implements Serializable {
return c; return c;
} }
public static List<CommunityModel> getValidCommunities(boolean production) throws IOException { public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException {
return MAPPER return MAPPER
.readValue(QueryCommunityAPI.communities(production), CommunitySummary.class) .readValue(QueryCommunityAPI.communities(baseURL), CommunitySummary.class)
.stream() .stream()
.filter( .filter(
community -> !community.getStatus().equals("hidden") && community -> !community.getStatus().equals("hidden") &&
@ -108,15 +111,15 @@ public class Utils implements Serializable {
/** /**
* it returns for each organization the list of associated communities * it returns for each organization the list of associated communities
*/ */
public static CommunityEntityMap getCommunityOrganization(boolean production) throws IOException { public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap(); CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities(production) getValidCommunities(baseURL)
.forEach(community -> { .forEach(community -> {
String id = community.getId(); String id = community.getId();
try { try {
List<String> associatedOrgs = MAPPER List<String> associatedOrgs = MAPPER
.readValue( .readValue(
QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class); QueryCommunityAPI.communityPropagationOrganization(id, baseURL), OrganizationList.class);
associatedOrgs.forEach(o -> { associatedOrgs.forEach(o -> {
if (!organizationMap if (!organizationMap
.keySet() .keySet()
@ -133,9 +136,10 @@ public class Utils implements Serializable {
return organizationMap; return organizationMap;
} }
public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException { public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException {
CommunityEntityMap projectMap = new CommunityEntityMap(); CommunityEntityMap projectMap = new CommunityEntityMap();
getValidCommunities(production)
getValidCommunities(baseURL)
.forEach(community -> { .forEach(community -> {
int page = -1; int page = -1;
int size = 100; int size = 100;
@ -147,7 +151,7 @@ public class Utils implements Serializable {
.readValue( .readValue(
QueryCommunityAPI QueryCommunityAPI
.communityProjects( .communityProjects(
community.getId(), String.valueOf(page), String.valueOf(size), production), community.getId(), String.valueOf(page), String.valueOf(size), baseURL),
ContentModel.class); ContentModel.class);
if (cm.getContent().size() > 0) { if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> { cm.getContent().forEach(p -> {

View File

@ -62,8 +62,8 @@ public class SparkBulkTagJob {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final boolean production = Boolean.valueOf(parser.get("production")); final String baseURL = parser.get("baseURL");
log.info("production: {}", production); log.info("baseURL: {}", baseURL);
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class); ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
log.info("pathMap: {}", new Gson().toJson(protoMappingParams)); log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
@ -79,7 +79,8 @@ public class SparkBulkTagJob {
if (taggingConf != null) { if (taggingConf != null) {
cc = CommunityConfigurationFactory.newInstance(taggingConf); cc = CommunityConfigurationFactory.newInstance(taggingConf);
} else { } else {
cc = Utils.getCommunityConfiguration(production); cc = Utils.getCommunityConfiguration(baseURL);
log.info(OBJECT_MAPPER.writeValueAsString(cc));
} }
runWithSparkSession( runWithSparkSession(
@ -134,7 +135,7 @@ public class SparkBulkTagJob {
ModelSupport.entityTypes ModelSupport.entityTypes
.keySet() .keySet()
.parallelStream() .parallelStream()
.filter(e -> ModelSupport.isResult(e)) .filter(ModelSupport::isResult)
.forEach(e -> { .forEach(e -> {
removeOutputDir(spark, outputPath + e.name()); removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger(); ResultTagger resultTagger = new ResultTagger();

View File

@ -13,7 +13,7 @@ public class Constraint implements Serializable {
private String verb; private String verb;
private String field; private String field;
private String value; private String value;
// private String element;
@JsonIgnore @JsonIgnore
private Selection selection; private Selection selection;
@ -41,6 +41,10 @@ public class Constraint implements Serializable {
this.value = value; this.value = value;
} }
//@JsonIgnore
// public void setSelection(Selection sel) {
// selection = sel;
// }
@JsonIgnore @JsonIgnore
public void setSelection(VerbResolver resolver) public void setSelection(VerbResolver resolver)
throws InvocationTargetException, NoSuchMethodException, InstantiationException, throws InvocationTargetException, NoSuchMethodException, InstantiationException,

View File

@ -82,11 +82,13 @@ public class ResultTagger implements Serializable {
// communities contains all the communities to be not added to the context // communities contains all the communities to be not added to the context
final Set<String> removeCommunities = new HashSet<>(); final Set<String> removeCommunities = new HashSet<>();
// if (conf.getRemoveConstraintsMap().keySet().size() > 0)
conf conf
.getRemoveConstraintsMap() .getRemoveConstraintsMap()
.keySet() .keySet()
.forEach( .forEach(
communityId -> { communityId -> {
// log.info("Remove constraints for " + communityId);
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) && if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
conf conf

View File

@ -1,21 +0,0 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class OrganizationMap extends HashMap<String, List<String>> {
public OrganizationMap() {
super();
}
public List<String> get(String key) {
if (super.get(key) == null) {
return new ArrayList<>();
}
return super.get(key);
}
}

View File

@ -9,9 +9,7 @@ import java.util.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
@ -48,10 +48,10 @@ public class PrepareResultCommunitySet {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final OrganizationMap organizationMap = new Gson() final String baseURL = parser.get("baseURL");
.fromJson( log.info("baseURL: {}", baseURL);
parser.get("organizationtoresultcommunitymap"),
OrganizationMap.class); final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL);
log.info("organizationMap: {}", new Gson().toJson(organizationMap)); log.info("organizationMap: {}", new Gson().toJson(organizationMap));
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
@ -70,7 +70,7 @@ public class PrepareResultCommunitySet {
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,
String outputPath, String outputPath,
OrganizationMap organizationMap) { CommunityEntityMap organizationMap) {
Dataset<Relation> relation = readPath(spark, inputPath, Relation.class); Dataset<Relation> relation = readPath(spark, inputPath, Relation.class);
relation.createOrReplaceTempView("relation"); relation.createOrReplaceTempView("relation");
@ -115,7 +115,7 @@ public class PrepareResultCommunitySet {
} }
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn( private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
OrganizationMap organizationMap) { CommunityEntityMap organizationMap) {
return value -> { return value -> {
String rId = value.getResultId(); String rId = value.getResultId();
Optional<List<String>> orgs = Optional.ofNullable(value.getMerges()); Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization; package eu.dnetlib.dhp.resulttocommunityfromorganization;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2; import scala.Tuple2;
@ -53,29 +54,14 @@ public class SparkResultToCommunityFromOrganizationJob {
final String possibleupdatespath = parser.get("preparedInfoPath"); final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath); log.info("preparedInfoPath: {}", possibleupdatespath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final Boolean saveGraph = Optional
.ofNullable(parser.get("saveGraph"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("saveGraph: {}", saveGraph);
@SuppressWarnings("unchecked")
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath); execPropagation(spark, inputPath, outputPath, possibleupdatespath);
if (saveGraph) {
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
}
}); });
} }
@ -83,11 +69,18 @@ public class SparkResultToCommunityFromOrganizationJob {
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,
String outputPath, String outputPath,
Class<R> resultClazz,
String possibleUpdatesPath) { String possibleUpdatesPath) {
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class); Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
Dataset<R> result = readPath(spark, inputPath, resultClazz);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
removeOutputDir(spark, outputPath + e.name());
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
result result
.joinWith( .joinWith(
@ -98,7 +91,10 @@ public class SparkResultToCommunityFromOrganizationJob {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath + e.name());
}
});
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {

View File

@ -52,11 +52,10 @@ public class PrepareResultCommunitySet {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final boolean production = Boolean.valueOf(parser.get("production")); final String baseURL = parser.get("baseURL");
log.info("production: {}", production); log.info("baseURL: {}", baseURL);
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production); final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL);
// log.info("projectsMap: {}", new Gson().toJson(projectsMap));
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();

View File

@ -1,5 +1,4 @@
[ [
{ {
"paramName":"s", "paramName":"s",
"paramLongName":"sourcePath", "paramLongName":"sourcePath",
@ -12,7 +11,6 @@
"paramDescription": "the json path associated to each selection field", "paramDescription": "the json path associated to each selection field",
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -25,19 +23,16 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "tg", "paramName": "tg",
"paramLongName": "taggingConf", "paramLongName": "taggingConf",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "p", "paramName": "bu",
"paramLongName": "production", "paramLongName": "baseURL",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": true "paramRequired": false
} }
] ]

View File

@ -12,21 +12,10 @@
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property> <property>
<name>postgresURL</name> <name>baseURL</name>
<description>the url of the postgress server to query</description> <description>the community API base URL</description>
</property> </property>
<property>
<name>postgresUser</name>
<description>the username to access the postgres db</description>
</property>
<property>
<name>postgresPassword</name>
<description>the postgres password</description>
</property>
</parameters> </parameters>
<global> <global>
@ -104,15 +93,13 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master> <master>yarn-cluster</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>bulkTagging-publication</name> <name>bulkTagging-result</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class> <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--num-executors=${sparkExecutorNumber} --num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory} --executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}
--conf spark.executor.memoryOverhead=${memoryOverhead}
--conf spark.sql.shuffle.partitions=${partitions}
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
@ -122,14 +109,12 @@
<arg>--sourcePath</arg><arg>${sourcePath}/</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg> <arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--production</arg><arg>${production}</arg> <arg>--baseURL</arg><arg>${baseURL}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>

View File

@ -1,62 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text()
let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept
let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept
let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept
let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text()
let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text()
let $zenodo := $x//param[./@name='zenodoCommunity']/text()
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden'
return
<community>
{ $x//CONFIGURATION/context/@id}
<removeConstraints>
{$x//CONFIGURATION/context/param[./@name='removeConstraints']/text() }
</removeConstraints>
<advancedConstraints>
{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }
</advancedConstraints>
<subjects>
{for $y in tokenize($subj,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($fos,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($sdg,',')
return
<subject>{$y}</subject>}
</subjects>
<datasources>
{for $d in $datasources
where $d/param[./@name='enabled']/text()='true'
return
<datasource>
<openaireId>
{$d//param[./@name='openaireId']/text()}
</openaireId>
<selcriteria>
{$d/param[./@name='selcriteria']/text()}
</selcriteria>
</datasource> }
</datasources>
<zenodocommunities>
{for $zc in $zenodo
return
<zenodocommunity>
<zenodoid>
{$zc}
</zenodoid>
</zenodocommunity>}
{for $zc in $communities
return
<zenodocommunity>
<zenodoid>
{$zc/param[./@name='zenodoid']/text()}
</zenodoid>
<selcriteria>
{$zc/param[./@name='selcriteria']/text()}
</selcriteria>
</zenodocommunity>}
</zenodocommunities>
</community>

View File

@ -5,24 +5,7 @@
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the path of the sequencial file to read",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"h",
"paramLongName":"hive_metastore_uris",
"paramDescription": "the hive metastore uris",
"paramRequired": true
},
{
"paramName":"sg",
"paramLongName":"saveGraph",
"paramDescription": "true if the new version of the graph must be saved",
"paramRequired": false
},
{
"paramName":"test",
"paramLongName":"isTest",
"paramDescription": "true if it is executing a test",
"paramRequired": false
},
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -35,12 +18,6 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{ {
"paramName": "p", "paramName": "p",
"paramLongName": "preparedInfoPath", "paramLongName": "preparedInfoPath",

View File

@ -5,12 +5,6 @@
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the path of the sequencial file to read",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"ocm",
"paramLongName":"organizationtoresultcommunitymap",
"paramDescription": "the map for the association organization communities",
"paramRequired": true
},
{ {
"paramName":"h", "paramName":"h",
"paramLongName":"hive_metastore_uris", "paramLongName":"hive_metastore_uris",
@ -28,6 +22,12 @@
"paramLongName": "outputPath", "paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files", "paramDescription": "the path used to store temporary output files",
"paramRequired": true "paramRequired": true
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "the base URL to the community API to use",
"paramRequired": false
} }
] ]

View File

@ -4,14 +4,14 @@
<name>sourcePath</name> <name>sourcePath</name>
<description>the source path</description> <description>the source path</description>
</property> </property>
<property>
<name>organizationtoresultcommunitymap</name>
<description>organization community map</description>
</property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property>
<name>baseURL</name>
<description>the community API base URL</description>
</property>
</parameters> </parameters>
<global> <global>
@ -93,149 +93,54 @@
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class> <class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=6
--executor-memory=${sparkExecutorMemory} --executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg> <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg> <arg>--baseURL</arg><arg>${baseURL}</arg>
</spark> </spark>
<ok to="fork-join-exec-propagation"/> <ok to="exec-propagation"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="fork-join-exec-propagation"> <action name="exec-propagation">
<path start="join_propagate_publication"/>
<path start="join_propagate_dataset"/>
<path start="join_propagate_otherresearchproduct"/>
<path start="join_propagate_software"/>
</fork>
<action name="join_propagate_publication">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>community2resultfromorganization-Publication</name> <name>community2resultfromorganization</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class> <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=6
--executor-memory=${sparkExecutorMemory} --executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts> </spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="join_propagate_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Dataset</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-ORP</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Software</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<join name="wait2" to="End"/>
<end name="End"/> <end name="End"/>

View File

@ -19,10 +19,10 @@
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "p", "paramName": "bu",
"paramLongName": "production", "paramLongName": "baseURL",
"paramDescription": "the path used to store temporary output files", "paramDescription": "the path used to store temporary output files",
"paramRequired": true "paramRequired": false
} }
] ]

View File

@ -4,11 +4,14 @@
<name>sourcePath</name> <name>sourcePath</name>
<description>the source path</description> <description>the source path</description>
</property> </property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property>
<name>baseURL</name>
<description>the community API base URL</description>
</property>
</parameters> </parameters>
<global> <global>
@ -104,7 +107,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg> <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--production</arg><arg>${production}</arg> <arg>--baseURL</arg><arg>${baseURL}</arg>
</spark> </spark>
<ok to="exec-propagation"/> <ok to="exec-propagation"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.HashMap;
import java.util.List; import java.util.List;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
@ -32,8 +31,6 @@ public class BulkTagJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\"," public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\","
+ " \"title\" : \"$['title'][*]['value']\"," + " \"title\" : \"$['title'][*]['value']\","
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
@ -43,7 +40,9 @@ public class BulkTagJobTest {
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," +
"\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," + "\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," +
"\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " + "\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " +
"\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} "; "\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"," +
"\"publisher\":\"$['publisher'].value\"," +
"\"publicationyear\":\"$['dateofacceptance'].value\"} ";
private static SparkSession spark; private static SparkSession spark;
@ -534,6 +533,7 @@ public class BulkTagJobTest {
+ "where MyD.inferenceprovenance = 'bulktagging'"; + "where MyD.inferenceprovenance = 'bulktagging'";
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query); org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
Assertions.assertEquals(7, idExplodeCommunity.count()); Assertions.assertEquals(7, idExplodeCommunity.count());
Assertions Assertions
@ -1573,11 +1573,10 @@ public class BulkTagJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(), getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/", "-outputPath", workingDir.toString() + "/",
"-production", Boolean.TRUE.toString(), // "-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", pathMap "-pathMap", pathMap,
"-taggingConf", taggingConf
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

View File

@ -72,15 +72,13 @@ public class ResultToCommunityJobTest {
SparkResultToCommunityFromOrganizationJob SparkResultToCommunityFromOrganizationJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass() "-sourcePath", getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample") .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/")
.getPath(), .getPath(),
"-hive_metastore_uris", "",
"-saveGraph", "true", "-outputPath", workingDir.toString() + "/",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-preparedInfoPath", preparedInfoPath "-preparedInfoPath", preparedInfoPath
}); });

View File

@ -26,7 +26,7 @@
<subjects/> <subjects/>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId> <openaireId>10|re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -140,39 +140,39 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId> <openaireId>10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId> <openaireId>10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId> <openaireId>10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId> <openaireId>10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId> <openaireId>10|doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId> <openaireId>10|re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::c5502a43e76feab55dd00cf50f519125</openaireId> <openaireId>10|openaire____::c5502a43e76feab55dd00cf50f519125</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::a48f09c562b247a9919acfe195549b47</openaireId> <openaireId>10|re3data_____::a48f09c562b247a9919acfe195549b47</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::97275a23ca44226c9964043c8462be96</openaireId> <openaireId>10|opendoar____::97275a23ca44226c9964043c8462be96</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -287,55 +287,55 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId> <openaireId>10|doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId> <openaireId>10|doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId> <openaireId>10|doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId> <openaireId>10|doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId> <openaireId>10|doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId> <openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId> <openaireId>10|doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId> <openaireId>10|tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId> <openaireId>10|doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId> <openaireId>10|doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId> <openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::dca64612dfe0963fffc119098a319957</openaireId> <openaireId>10|doajarticles::dca64612dfe0963fffc119098a319957</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId> <openaireId>10|doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -406,27 +406,27 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId> <openaireId>10|re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId> <openaireId>10|doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::8515794670370f49c1d176c399c714f5</openaireId> <openaireId>10|re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId> <openaireId>10|doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId> <openaireId>10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId> <openaireId>10|rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -743,27 +743,27 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId> <openaireId>10|opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId> <openaireId>10|opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId> <openaireId>10|opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId> <openaireId>10|opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId> <openaireId>10|opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId> <openaireId>10|opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -983,11 +983,11 @@
<subjects/> <subjects/>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId> <openaireId>10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId> <openaireId>10|opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>
@ -1166,87 +1166,87 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId> <openaireId>10|doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId> <openaireId>10|doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId> <openaireId>10|doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::479ca537c12755d1868bbf02938a900c</openaireId> <openaireId>10|doajarticles::479ca537c12755d1868bbf02938a900c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId> <openaireId>10|doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId> <openaireId>10|doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId> <openaireId>10|doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId> <openaireId>10|doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId> <openaireId>10|doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId> <openaireId>10|doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::172050a92511838393a3fe237ae47e31</openaireId> <openaireId>10|doajarticles::172050a92511838393a3fe237ae47e31</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId> <openaireId>10|doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId> <openaireId>10|doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId> <openaireId>10|doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::86faef424d804309ccf45f692523aa48</openaireId> <openaireId>10|doajarticles::86faef424d804309ccf45f692523aa48</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::73bd758fa41671de70964c3ecba013af</openaireId> <openaireId>10|doajarticles::73bd758fa41671de70964c3ecba013af</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId> <openaireId>10|doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId> <openaireId>10|doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId> <openaireId>10|doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId> <openaireId>10|doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId> <openaireId>10|doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -1345,87 +1345,87 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId> <openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId> <openaireId>10|doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId> <openaireId>10|doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::00017183dc4c858fb77541985323a4ef</openaireId> <openaireId>10|doajarticles::00017183dc4c858fb77541985323a4ef</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId> <openaireId>10|doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId> <openaireId>10|doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId> <openaireId>10|doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId> <openaireId>10|doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId> <openaireId>10|doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::f67ad6d268162079b3abd51a24468744</openaireId> <openaireId>10|doajarticles::f67ad6d268162079b3abd51a24468744</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId> <openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId> <openaireId>10|doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId> <openaireId>10|doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId> <openaireId>10|doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId> <openaireId>10|doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId> <openaireId>10|doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId> <openaireId>10|doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId> <openaireId>10|doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId> <openaireId>10|doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId> <openaireId>10|doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId> <openaireId>10|doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -1454,81 +1454,81 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId> <openaireId>10|opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId> <openaireId>10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId> <openaireId>10|driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId> <openaireId>10|openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId> <openaireId>10|openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId> <openaireId>10|openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId> <openaireId>10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId> <openaireId>10|share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId> <openaireId>10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId> <openaireId>10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId> <openaireId>10|re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId> <openaireId>10|re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More