forked from D-Net/dnet-hadoop
Merge branch 'beta' into clean_license_publisher
This commit is contained in:
commit
34abd0fc43
|
@ -63,7 +63,10 @@ public class Vocabulary implements Serializable {
|
|||
}
|
||||
|
||||
public VocabularyTerm getTermBySynonym(final String syn) {
|
||||
return getTerm(synonyms.get(syn.toLowerCase()));
|
||||
return Optional
|
||||
.ofNullable(syn)
|
||||
.map(s -> getTerm(synonyms.get(s.toLowerCase())))
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
public Qualifier getTermAsQualifier(final String termId) {
|
||||
|
|
|
@ -135,6 +135,24 @@ public class VocabularyGroup implements Serializable {
|
|||
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
||||
}
|
||||
|
||||
public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
|
||||
return find(vocId)
|
||||
.map(
|
||||
vocabulary -> Optional
|
||||
.ofNullable(vocabulary.getTerm(syn))
|
||||
.map(
|
||||
term -> OafMapperUtils
|
||||
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
|
||||
.orElse(
|
||||
Optional
|
||||
.ofNullable(vocabulary.getTermBySynonym(syn))
|
||||
.map(
|
||||
term -> OafMapperUtils
|
||||
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
|
||||
.orElse(null)))
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* getSynonymAsQualifierCaseSensitive
|
||||
*
|
||||
|
|
|
@ -119,6 +119,131 @@ public class AuthorMerger {
|
|||
});
|
||||
}
|
||||
|
||||
public static String normalizeFullName(final String fullname) {
|
||||
return nfd(fullname)
|
||||
.toLowerCase()
|
||||
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||
// in case
|
||||
// of large input strings
|
||||
.replaceAll("(\\W)+", " ")
|
||||
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
|
||||
.replaceAll("(\\p{Punct})+", " ")
|
||||
.replaceAll("(\\d)+", " ")
|
||||
.replaceAll("(\\n)+", " ")
|
||||
|
||||
.trim();
|
||||
}
|
||||
|
||||
private static String authorFieldToBeCompared(Author author) {
|
||||
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||
return author.getSurname();
|
||||
|
||||
}
|
||||
if (StringUtils.isNotBlank(author.getFullname())) {
|
||||
return author.getFullname();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method tries to figure out when two author are the same in the contest
|
||||
* of ORCID enrichment
|
||||
*
|
||||
* @param left Author in the OAF entity
|
||||
* @param right Author ORCID
|
||||
* @return based on a heuristic on the names of the authors if they are the same.
|
||||
*/
|
||||
public static boolean checkORCIDSimilarity(final Author left, final Author right) {
|
||||
final Person pl = parse(left);
|
||||
final Person pr = parse(right);
|
||||
|
||||
// If one of them didn't have a surname we verify if they have the fullName not empty
|
||||
// and verify if the normalized version is equal
|
||||
if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
|
||||
pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
|
||||
|
||||
if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
|
||||
&& !pr.getFullname().isEmpty()) {
|
||||
return pl
|
||||
.getFullname()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// The Authors have one surname in common
|
||||
if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
|
||||
|
||||
// If one of them has only a surname and is the same we can say that they are the same author
|
||||
if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
|
||||
(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
|
||||
return true;
|
||||
// The authors have the same initials of Name in common
|
||||
if (pl
|
||||
.getName()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
nl -> pr
|
||||
.getName()
|
||||
.stream()
|
||||
.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
|
||||
// We verify if we have an exact match between name and surname
|
||||
if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
|
||||
pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
//
|
||||
|
||||
/**
|
||||
* Method to enrich ORCID information in one list of authors based on another list
|
||||
*
|
||||
* @param baseAuthor the Author List in the OAF Entity
|
||||
* @param orcidAuthor The list of ORCID Author intersected
|
||||
* @return The Author List of the OAF Entity enriched with the orcid Author
|
||||
*/
|
||||
public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
|
||||
|
||||
if (baseAuthor == null || baseAuthor.isEmpty())
|
||||
return orcidAuthor;
|
||||
|
||||
if (orcidAuthor == null || orcidAuthor.isEmpty())
|
||||
return baseAuthor;
|
||||
|
||||
if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
|
||||
return baseAuthor;
|
||||
|
||||
final List<Author> oAuthor = new ArrayList<>();
|
||||
oAuthor.addAll(orcidAuthor);
|
||||
|
||||
baseAuthor.forEach(ba -> {
|
||||
Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
|
||||
if (aMatch.isPresent()) {
|
||||
final Author sameAuthor = aMatch.get();
|
||||
addPid(ba, sameAuthor.getPid());
|
||||
oAuthor.remove(sameAuthor);
|
||||
}
|
||||
});
|
||||
return baseAuthor;
|
||||
}
|
||||
|
||||
private static void addPid(final Author a, final List<StructuredProperty> pids) {
|
||||
|
||||
if (a.getPid() == null) {
|
||||
a.setPid(new ArrayList<>());
|
||||
}
|
||||
|
||||
a.getPid().addAll(pids);
|
||||
|
||||
}
|
||||
|
||||
public static String pidToComparableString(StructuredProperty pid) {
|
||||
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
||||
: "";
|
||||
|
@ -171,7 +296,7 @@ public class AuthorMerger {
|
|||
}
|
||||
}
|
||||
|
||||
private static String normalize(final String s) {
|
||||
public static String normalize(final String s) {
|
||||
String[] normalized = nfd(s)
|
||||
.toLowerCase()
|
||||
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||
|
|
|
@ -21,10 +21,15 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
|
@ -35,6 +40,12 @@ public class GroupEntitiesSparkJob {
|
|||
|
||||
private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
|
||||
|
||||
private ArgumentApplicationParser parser;
|
||||
|
||||
public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
|
@ -51,6 +62,17 @@ public class GroupEntitiesSparkJob {
|
|||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String isLookupUrl = parser.get("isLookupUrl");
|
||||
log.info("isLookupUrl: {}", isLookupUrl);
|
||||
|
||||
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
|
||||
new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
|
||||
}
|
||||
|
||||
public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
|
||||
throws ISLookUpException {
|
||||
|
||||
String graphInputPath = parser.get("graphInputPath");
|
||||
log.info("graphInputPath: {}", graphInputPath);
|
||||
|
||||
|
@ -60,19 +82,21 @@ public class GroupEntitiesSparkJob {
|
|||
String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible"));
|
||||
boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
|
||||
log.info("filterInvisible: {}", filterInvisible);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
||||
|
||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
|
||||
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible);
|
||||
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -81,7 +105,7 @@ public class GroupEntitiesSparkJob {
|
|||
String inputPath,
|
||||
String checkpointPath,
|
||||
String outputPath,
|
||||
boolean filterInvisible) {
|
||||
boolean filterInvisible, VocabularyGroup vocs) {
|
||||
|
||||
Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
|
||||
|
||||
|
@ -106,10 +130,14 @@ public class GroupEntitiesSparkJob {
|
|||
}
|
||||
|
||||
Dataset<?> groupedEntities = allEntities
|
||||
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
|
||||
.reduceGroups((ReduceFunction<OafEntity>) (b, a) -> OafMapperUtils.mergeEntities(b, a))
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2(
|
||||
(MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
|
||||
.applyCoarVocabularies(entity, vocs),
|
||||
OAFENTITY_KRYO_ENC)
|
||||
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
|
||||
.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
|
||||
t._2().getClass().getName(), t._2()),
|
||||
Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
|
@ -889,4 +891,105 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
return s;
|
||||
}
|
||||
|
||||
public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
|
||||
|
||||
if (entity instanceof Result) {
|
||||
final Result result = (Result) entity;
|
||||
|
||||
Optional
|
||||
.ofNullable(result.getInstance())
|
||||
.ifPresent(
|
||||
instances -> instances
|
||||
.forEach(
|
||||
instance -> {
|
||||
if (Objects.isNull(instance.getInstanceTypeMapping())) {
|
||||
List<InstanceTypeMapping> mapping = Lists.newArrayList();
|
||||
mapping
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.instanceTypeMapping(
|
||||
instance.getInstancetype().getClassname(),
|
||||
OPENAIRE_COAR_RESOURCE_TYPES_3_1));
|
||||
instance.setInstanceTypeMapping(mapping);
|
||||
}
|
||||
Optional<InstanceTypeMapping> optionalItm = instance
|
||||
.getInstanceTypeMapping()
|
||||
.stream()
|
||||
.filter(GraphCleaningFunctions::originalResourceType)
|
||||
.findFirst();
|
||||
if (optionalItm.isPresent()) {
|
||||
InstanceTypeMapping coarItm = optionalItm.get();
|
||||
Optional
|
||||
.ofNullable(
|
||||
vocs
|
||||
.lookupTermBySynonym(
|
||||
OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
|
||||
.ifPresent(type -> {
|
||||
coarItm.setTypeCode(type.getClassid());
|
||||
coarItm.setTypeLabel(type.getClassname());
|
||||
});
|
||||
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
|
||||
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
|
||||
Optional
|
||||
.ofNullable(
|
||||
vocs
|
||||
.lookupTermBySynonym(
|
||||
OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
|
||||
.ifPresent(
|
||||
type -> mappings
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.instanceTypeMapping(coarItm.getTypeCode(), type)));
|
||||
}
|
||||
if (!mappings.isEmpty()) {
|
||||
instance.getInstanceTypeMapping().addAll(mappings);
|
||||
}
|
||||
}
|
||||
}));
|
||||
result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
|
||||
}
|
||||
|
||||
return entity;
|
||||
}
|
||||
|
||||
private static boolean originalResourceType(InstanceTypeMapping itm) {
|
||||
return StringUtils.isNotBlank(itm.getOriginalType()) &&
|
||||
OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
|
||||
StringUtils.isBlank(itm.getTypeCode()) &&
|
||||
StringUtils.isBlank(itm.getTypeLabel());
|
||||
}
|
||||
|
||||
private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
|
||||
return Optional
|
||||
.ofNullable(instances)
|
||||
.map(ii -> {
|
||||
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
|
||||
Optional<InstanceTypeMapping> itm = ii
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.flatMap(
|
||||
i -> Optional
|
||||
.ofNullable(i.getInstanceTypeMapping())
|
||||
.map(Collection::stream)
|
||||
.orElse(Stream.empty()))
|
||||
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
|
||||
.findFirst();
|
||||
|
||||
if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
|
||||
return null;
|
||||
} else {
|
||||
final String typeCode = itm.get().getTypeCode();
|
||||
return Optional
|
||||
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
|
||||
.orElseThrow(
|
||||
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
|
||||
OPENAIRE_META_RESOURCE_TYPE));
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
|
||||
}
|
||||
})
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,7 +14,6 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
|
@ -141,6 +140,28 @@ public class OafMapperUtils {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
|
||||
String vocabularyName) {
|
||||
final InstanceTypeMapping m = new InstanceTypeMapping();
|
||||
m.setVocabularyName(vocabularyName);
|
||||
m.setOriginalType(originalType);
|
||||
m.setTypeCode(code);
|
||||
m.setTypeLabel(label);
|
||||
return m;
|
||||
}
|
||||
|
||||
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
|
||||
return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
|
||||
}
|
||||
|
||||
public static InstanceTypeMapping instanceTypeMapping(String originalType) {
|
||||
return instanceTypeMapping(originalType, null, null, null);
|
||||
}
|
||||
|
||||
public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
|
||||
return instanceTypeMapping(originalType, null, null, vocabularyName);
|
||||
}
|
||||
|
||||
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||
return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
|
||||
}
|
||||
|
|
|
@ -28,5 +28,11 @@
|
|||
"paramLongName": "filterInvisible",
|
||||
"paramDescription": "if true filters out invisible entities",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "isu",
|
||||
"paramLongName": "isLookupUrl",
|
||||
"paramDescription": "url to the ISLookup Service",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,114 @@
|
|||
|
||||
package eu.dnetlib.oa.merge;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.platform.commons.util.StringUtils;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
|
||||
public class AuthorMergerTest {
|
||||
|
||||
@Test
|
||||
public void testEnrcichAuthor() throws Exception {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
BufferedReader pr = new BufferedReader(new InputStreamReader(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AuthorMergerTest.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
|
||||
BufferedReader or = new BufferedReader(new InputStreamReader(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
|
||||
|
||||
TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
|
||||
};
|
||||
String pubLine;
|
||||
|
||||
int i = 0;
|
||||
while ((pubLine = pr.readLine()) != null) {
|
||||
final String pubId = pubLine;
|
||||
final String MatchPidOrcid = or.readLine();
|
||||
final String pubOrcid = or.readLine();
|
||||
|
||||
final String data = pr.readLine();
|
||||
|
||||
if (StringUtils.isNotBlank(data)) {
|
||||
List<Author> publicationAuthors = mapper.readValue(data, aclass);
|
||||
List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
|
||||
System.out.printf("OAF ID = %s \n", pubId);
|
||||
System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
|
||||
System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
|
||||
System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
|
||||
System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
|
||||
|
||||
long originalAuthorWithPiD = publicationAuthors
|
||||
.stream()
|
||||
.filter(
|
||||
a -> a.getPid() != null && a
|
||||
.getPid()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
p -> p.getQualifier() != null
|
||||
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
|
||||
.count();
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
|
||||
final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
|
||||
|
||||
long enrichedAuthorWithPid = enrichedList
|
||||
.stream()
|
||||
.filter(
|
||||
a -> a.getPid() != null && a
|
||||
.getPid()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
p -> p.getQualifier() != null
|
||||
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
|
||||
.count();
|
||||
|
||||
long totalTime = (System.currentTimeMillis() - start) / 1000;
|
||||
System.out
|
||||
.printf(
|
||||
"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
|
||||
enrichedAuthorWithPid);
|
||||
|
||||
System.out.println("=================");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkSimilarityTest() {
|
||||
final Author left = new Author();
|
||||
left.setName("Anand");
|
||||
left.setSurname("Rachna");
|
||||
left.setFullname("Anand, Rachna");
|
||||
|
||||
System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
|
||||
|
||||
final Author right = new Author();
|
||||
right.setName("Rachna");
|
||||
right.setSurname("Anand");
|
||||
right.setFullname("Rachna, Anand");
|
||||
// System.out.println(AuthorMerger.normalize(right.getFullname()));
|
||||
boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
|
||||
|
||||
assertTrue(same);
|
||||
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,102 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.http.client.config.RequestConfig;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClientBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class DownloadORCIDDumpApplication {
|
||||
private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
|
||||
|
||||
private final FileSystem fileSystem;
|
||||
|
||||
public DownloadORCIDDumpApplication(FileSystem fileSystem) {
|
||||
this.fileSystem = fileSystem;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DownloadORCIDDumpApplication.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
|
||||
argumentParser.parseArgument(args);
|
||||
|
||||
final String hdfsuri = argumentParser.get("namenode");
|
||||
log.info("hdfsURI is {}", hdfsuri);
|
||||
|
||||
final String targetPath = argumentParser.get("targetPath");
|
||||
log.info("targetPath is {}", targetPath);
|
||||
|
||||
final String apiURL = argumentParser.get("apiURL");
|
||||
log.info("apiURL is {}", apiURL);
|
||||
|
||||
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
|
||||
|
||||
new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
|
||||
|
||||
}
|
||||
|
||||
private void downloadItem(final String name, final String itemURL, final String basePath) {
|
||||
try {
|
||||
final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
|
||||
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
|
||||
final HttpGet request = new HttpGet(itemURL);
|
||||
final int timeout = 60; // seconds
|
||||
final RequestConfig config = RequestConfig
|
||||
.custom()
|
||||
.setConnectTimeout(timeout * 1000)
|
||||
.setConnectionRequestTimeout(timeout * 1000)
|
||||
.setSocketTimeout(timeout * 1000)
|
||||
.build();
|
||||
log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
|
||||
try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
|
||||
CloseableHttpResponse response = client.execute(request)) {
|
||||
int responseCode = response.getStatusLine().getStatusCode();
|
||||
log.info("Response code is {}", responseCode);
|
||||
if (responseCode >= 200 && responseCode < 400) {
|
||||
IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
|
||||
}
|
||||
} catch (Throwable eu) {
|
||||
throw new RuntimeException(eu);
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void run(final String targetPath, final String apiURL) throws Exception {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
final URL url = new URL(apiURL);
|
||||
URLConnection conn = url.openConnection();
|
||||
InputStream is = conn.getInputStream();
|
||||
final String json = IOUtils.toString(is);
|
||||
JsonNode jsonNode = mapper.readTree(json);
|
||||
jsonNode
|
||||
.get("files")
|
||||
.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class ExtractORCIDDump {
|
||||
private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
|
||||
|
||||
private final FileSystem fileSystem;
|
||||
|
||||
public ExtractORCIDDump(FileSystem fileSystem) {
|
||||
this.fileSystem = fileSystem;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
DownloadORCIDDumpApplication.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
|
||||
argumentParser.parseArgument(args);
|
||||
|
||||
final String hdfsuri = argumentParser.get("namenode");
|
||||
log.info("hdfsURI is {}", hdfsuri);
|
||||
|
||||
final String sourcePath = argumentParser.get("sourcePath");
|
||||
log.info("sourcePath is {}", sourcePath);
|
||||
|
||||
final String targetPath = argumentParser.get("targetPath");
|
||||
log.info("targetPath is {}", targetPath);
|
||||
|
||||
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
|
||||
|
||||
new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
|
||||
|
||||
}
|
||||
|
||||
public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
|
||||
RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
|
||||
final List<ORCIDExtractor> workers = new ArrayList<>();
|
||||
int i = 0;
|
||||
while (ls.hasNext()) {
|
||||
LocatedFileStatus current = ls.next();
|
||||
if (current.getPath().getName().endsWith("tar.gz")) {
|
||||
workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
|
||||
}
|
||||
}
|
||||
workers.forEach(Thread::start);
|
||||
for (ORCIDExtractor worker : workers) {
|
||||
worker.join();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**\
|
||||
* The ORCIDExtractor class extracts ORCID data from a TAR archive.
|
||||
* The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
|
||||
* Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
|
||||
* Finally, it closes all the SequenceFile.Writer objects.
|
||||
*/
|
||||
public class ORCIDExtractor extends Thread {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
|
||||
|
||||
private final FileSystem fileSystem;
|
||||
|
||||
private final String id;
|
||||
|
||||
private final Path sourcePath;
|
||||
|
||||
private final String baseOutputPath;
|
||||
|
||||
public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
|
||||
this.fileSystem = fileSystem;
|
||||
this.id = id;
|
||||
this.sourcePath = sourcePath;
|
||||
this.baseOutputPath = baseOutputPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a map of SequenceFile.Writer objects,
|
||||
* one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
|
||||
* For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
|
||||
* object that writes employment data.
|
||||
* @return the Map
|
||||
*/
|
||||
private Map<String, SequenceFile.Writer> createMap() {
|
||||
try {
|
||||
log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
|
||||
Map<String, SequenceFile.Writer> res = new HashMap<>();
|
||||
if (sourcePath.getName().contains("summaries")) {
|
||||
|
||||
final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
|
||||
final SequenceFile.Writer summary_file = SequenceFile
|
||||
.createWriter(
|
||||
fileSystem.getConf(),
|
||||
SequenceFile.Writer.file(new Path(summaryPath)),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class));
|
||||
|
||||
log.info("Thread {} Creating only summary path here {}", id, summaryPath);
|
||||
res.put("summary", summary_file);
|
||||
return res;
|
||||
} else {
|
||||
String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
|
||||
final SequenceFile.Writer employments_file = SequenceFile
|
||||
.createWriter(
|
||||
fileSystem.getConf(),
|
||||
SequenceFile.Writer.file(new Path(employmentsPath)),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class));
|
||||
res.put("employments", employments_file);
|
||||
log.info("Thread {} Creating employments path here {}", id, employmentsPath);
|
||||
|
||||
final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
|
||||
final SequenceFile.Writer works_file = SequenceFile
|
||||
.createWriter(
|
||||
fileSystem.getConf(),
|
||||
SequenceFile.Writer.file(new Path(worksPath)),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class));
|
||||
res.put("works", works_file);
|
||||
log.info("Thread {} Creating works path here {}", id, worksPath);
|
||||
|
||||
return res;
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
|
||||
CompressionCodec codec = factory.getCodec(sourcePath);
|
||||
if (codec == null) {
|
||||
System.err.println("No codec found for " + sourcePath.getName());
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
InputStream gzipInputStream = null;
|
||||
try {
|
||||
gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
|
||||
final Map<String, SequenceFile.Writer> fileMap = createMap();
|
||||
iterateTar(fileMap, gzipInputStream);
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
log.info("Closing gzip stream");
|
||||
IOUtils.closeStream(gzipInputStream);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
|
||||
if (sourcePath.getName().contains("summaries")) {
|
||||
return fileMap.get("summary");
|
||||
}
|
||||
|
||||
if (path.contains("works")) {
|
||||
return fileMap.get("works");
|
||||
}
|
||||
if (path.contains("employments"))
|
||||
return fileMap.get("employments");
|
||||
return null;
|
||||
}
|
||||
|
||||
private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
|
||||
|
||||
int extractedItem = 0;
|
||||
try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
|
||||
|
||||
TarArchiveEntry entry;
|
||||
while ((entry = tais.getNextTarEntry()) != null) {
|
||||
|
||||
if (entry.isFile()) {
|
||||
|
||||
final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
|
||||
if (fl != null) {
|
||||
final Text key = new Text(entry.getName());
|
||||
final Text value = new Text(
|
||||
org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
|
||||
fl.append(key, value);
|
||||
extractedItem++;
|
||||
if (extractedItem % 100000 == 0) {
|
||||
log.info("Thread {}: Extracted {} items", id, extractedItem);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
for (SequenceFile.Writer k : fileMap.values()) {
|
||||
log.info("Thread {}: Completed processed {} items", id, extractedItem);
|
||||
k.hflush();
|
||||
k.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,251 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.ximpleware.*;
|
||||
|
||||
import eu.dnetlib.dhp.collection.orcid.model.*;
|
||||
import eu.dnetlib.dhp.parser.utility.VtdException;
|
||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
||||
|
||||
public class OrcidParser {
|
||||
|
||||
final Logger log = LoggerFactory.getLogger(OrcidParser.class);
|
||||
private VTDNav vn;
|
||||
|
||||
private AutoPilot ap;
|
||||
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
|
||||
private static final String NS_COMMON = "common";
|
||||
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
|
||||
private static final String NS_PERSON = "person";
|
||||
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
|
||||
private static final String NS_DETAILS = "personal-details";
|
||||
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
|
||||
private static final String NS_OTHER = "other-name";
|
||||
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
|
||||
private static final String NS_RECORD = "record";
|
||||
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
|
||||
private static final String NS_ACTIVITIES = "activities";
|
||||
private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
|
||||
private static final String NS_WORK = "work";
|
||||
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
|
||||
|
||||
private static final String NS_ERROR = "error";
|
||||
private static final String NS_HISTORY = "history";
|
||||
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
|
||||
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
|
||||
private static final String NS_BULK = "bulk";
|
||||
private static final String NS_EXTERNAL = "external-identifier";
|
||||
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
|
||||
|
||||
private void generateParsedDocument(final String xml) throws ParseException {
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(xml.getBytes());
|
||||
vg.parse(true);
|
||||
this.vn = vg.getNav();
|
||||
this.ap = new AutoPilot(vn);
|
||||
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
||||
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
|
||||
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
|
||||
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
|
||||
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
|
||||
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
||||
ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
|
||||
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
|
||||
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
|
||||
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
|
||||
}
|
||||
|
||||
public Author parseSummary(final String xml) {
|
||||
|
||||
try {
|
||||
final Author author = new Author();
|
||||
generateParsedDocument(xml);
|
||||
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(
|
||||
ap, vn, "//record:record", Arrays.asList("path"));
|
||||
if (!recordNodes.isEmpty()) {
|
||||
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
|
||||
author.setOrcid(oid);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(
|
||||
ap, vn, "//person:name", Arrays.asList("visibility"));
|
||||
final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
|
||||
author.setVisibility(visibility);
|
||||
final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
|
||||
author.setGivenName(name);
|
||||
|
||||
final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
|
||||
author.setFamilyName(surnames);
|
||||
|
||||
final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
|
||||
author.setCreditName(creditNames);
|
||||
|
||||
final String biography = VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//person:biography/personal-details:content");
|
||||
author.setBiography(biography);
|
||||
|
||||
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
|
||||
if (!otherNames.isEmpty()) {
|
||||
author.setOtherNames(otherNames);
|
||||
}
|
||||
|
||||
ap.selectXPath("//external-identifier:external-identifier");
|
||||
|
||||
while (ap.evalXPath() != -1) {
|
||||
final Pid pid = new Pid();
|
||||
|
||||
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||
|
||||
ap1.selectXPath("./common:external-id-type");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
pid.setSchema(vn.toNormalizedString(it));
|
||||
}
|
||||
ap1.selectXPath("./common:external-id-value");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
pid.setValue(vn.toNormalizedString(it));
|
||||
}
|
||||
|
||||
author.addOtherPid(pid);
|
||||
}
|
||||
|
||||
return author;
|
||||
} catch (Throwable e) {
|
||||
log.error("Error on parsing {}", xml);
|
||||
log.error(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Work parseWork(final String xml) {
|
||||
|
||||
try {
|
||||
final Work work = new Work();
|
||||
generateParsedDocument(xml);
|
||||
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
|
||||
if (!workNodes.isEmpty()) {
|
||||
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
|
||||
work.setOrcid(oid);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
ap.selectXPath("//common:external-id");
|
||||
|
||||
while (ap.evalXPath() != -1) {
|
||||
final Pid pid = new Pid();
|
||||
|
||||
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||
|
||||
ap1.selectXPath("./common:external-id-type");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
pid.setSchema(vn.toNormalizedString(it));
|
||||
}
|
||||
ap1.selectXPath("./common:external-id-value");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
pid.setValue(vn.toNormalizedString(it));
|
||||
}
|
||||
|
||||
work.addPid(pid);
|
||||
}
|
||||
|
||||
work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
|
||||
|
||||
return work;
|
||||
} catch (Throwable e) {
|
||||
log.error("Error on parsing {}", xml);
|
||||
log.error(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String extractEmploymentDate(final String xpath) throws Exception {
|
||||
|
||||
ap.selectXPath(xpath);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (ap.evalXPath() != -1) {
|
||||
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||
ap1.selectXPath("./common:year");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
sb.append(vn.toNormalizedString(it));
|
||||
}
|
||||
ap1.selectXPath("./common:month");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
sb.append("-");
|
||||
sb.append(vn.toNormalizedString(it));
|
||||
}
|
||||
ap1.selectXPath("./common:day");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
sb.append("-");
|
||||
sb.append(vn.toNormalizedString(it));
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
|
||||
}
|
||||
|
||||
public Employment parseEmployment(final String xml) {
|
||||
try {
|
||||
final Employment employment = new Employment();
|
||||
generateParsedDocument(xml);
|
||||
final String oid = VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//common:source-orcid/common:path");
|
||||
if (StringUtils.isNotBlank(oid)) {
|
||||
employment.setOrcid(oid);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
final String depName = VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//common:department-name");
|
||||
final String rolTitle = VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//common:role-title");
|
||||
if (StringUtils.isNotBlank(rolTitle))
|
||||
employment.setRoleTitle(rolTitle);
|
||||
if (StringUtils.isNotBlank(depName))
|
||||
employment.setDepartmentName(depName);
|
||||
else
|
||||
employment
|
||||
.setDepartmentName(
|
||||
VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//common:organization/common:name"));
|
||||
|
||||
employment.setStartDate(extractEmploymentDate("//common:start-date"));
|
||||
employment.setEndDate(extractEmploymentDate("//common:end-date"));
|
||||
|
||||
final String affiliationId = VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
|
||||
final String affiliationIdType = VtdUtilityParser
|
||||
.getSingleValue(ap, vn, "//common:disambiguation-source");
|
||||
|
||||
if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
|
||||
employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
|
||||
|
||||
return employment;
|
||||
} catch (Throwable e) {
|
||||
log.error("Error on parsing {}", xml);
|
||||
log.error(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Author extends ORCIDItem {
|
||||
private String givenName;
|
||||
private String familyName;
|
||||
|
||||
private String visibility;
|
||||
|
||||
private String creditName;
|
||||
|
||||
private List<String> otherNames;
|
||||
|
||||
private List<Pid> otherPids;
|
||||
|
||||
private String biography;
|
||||
|
||||
public String getBiography() {
|
||||
return biography;
|
||||
}
|
||||
|
||||
public void setBiography(String biography) {
|
||||
this.biography = biography;
|
||||
}
|
||||
|
||||
public String getGivenName() {
|
||||
return givenName;
|
||||
}
|
||||
|
||||
public void setGivenName(String givenName) {
|
||||
this.givenName = givenName;
|
||||
}
|
||||
|
||||
public String getFamilyName() {
|
||||
return familyName;
|
||||
}
|
||||
|
||||
public void setFamilyName(String familyName) {
|
||||
this.familyName = familyName;
|
||||
}
|
||||
|
||||
public String getCreditName() {
|
||||
return creditName;
|
||||
}
|
||||
|
||||
public void setCreditName(String creditName) {
|
||||
this.creditName = creditName;
|
||||
}
|
||||
|
||||
public List<String> getOtherNames() {
|
||||
return otherNames;
|
||||
}
|
||||
|
||||
public void setOtherNames(List<String> otherNames) {
|
||||
this.otherNames = otherNames;
|
||||
}
|
||||
|
||||
public String getVisibility() {
|
||||
return visibility;
|
||||
}
|
||||
|
||||
public void setVisibility(String visibility) {
|
||||
this.visibility = visibility;
|
||||
}
|
||||
|
||||
public List<Pid> getOtherPids() {
|
||||
return otherPids;
|
||||
}
|
||||
|
||||
public void setOtherPids(List<Pid> otherPids) {
|
||||
this.otherPids = otherPids;
|
||||
}
|
||||
|
||||
public void addOtherPid(final Pid pid) {
|
||||
|
||||
if (otherPids == null)
|
||||
otherPids = new ArrayList<>();
|
||||
otherPids.add(pid);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid.model;
|
||||
|
||||
public class Employment extends ORCIDItem {
|
||||
|
||||
private String startDate;
|
||||
private String EndDate;
|
||||
|
||||
private Pid affiliationId;
|
||||
|
||||
private String departmentName;
|
||||
|
||||
private String roleTitle;
|
||||
|
||||
public String getStartDate() {
|
||||
return startDate;
|
||||
}
|
||||
|
||||
public void setStartDate(String startDate) {
|
||||
this.startDate = startDate;
|
||||
}
|
||||
|
||||
public String getEndDate() {
|
||||
return EndDate;
|
||||
}
|
||||
|
||||
public void setEndDate(String endDate) {
|
||||
EndDate = endDate;
|
||||
}
|
||||
|
||||
public Pid getAffiliationId() {
|
||||
return affiliationId;
|
||||
}
|
||||
|
||||
public void setAffiliationId(Pid affiliationId) {
|
||||
this.affiliationId = affiliationId;
|
||||
}
|
||||
|
||||
public String getDepartmentName() {
|
||||
return departmentName;
|
||||
}
|
||||
|
||||
public void setDepartmentName(String departmentName) {
|
||||
this.departmentName = departmentName;
|
||||
}
|
||||
|
||||
public String getRoleTitle() {
|
||||
return roleTitle;
|
||||
}
|
||||
|
||||
public void setRoleTitle(String roleTitle) {
|
||||
this.roleTitle = roleTitle;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid.model;
|
||||
|
||||
public class ORCIDItem {
|
||||
private String orcid;
|
||||
|
||||
public String getOrcid() {
|
||||
return orcid;
|
||||
}
|
||||
|
||||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid.model;
|
||||
|
||||
public class Pid {
|
||||
|
||||
private String value;
|
||||
|
||||
private String schema;
|
||||
|
||||
public Pid() {
|
||||
}
|
||||
|
||||
public Pid(String value, String schema) {
|
||||
this.value = value;
|
||||
this.schema = schema;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getSchema() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
public void setSchema(String schema) {
|
||||
this.schema = schema;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Work extends ORCIDItem {
|
||||
|
||||
private String title;
|
||||
|
||||
private List<Pid> pids;
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public List<Pid> getPids() {
|
||||
return pids;
|
||||
}
|
||||
|
||||
public void setPids(List<Pid> pids) {
|
||||
this.pids = pids;
|
||||
}
|
||||
|
||||
public void addPid(Pid pid) {
|
||||
if (pids == null)
|
||||
pids = new ArrayList<>();
|
||||
pids.add(pid);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
[
|
||||
{
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH where download the files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "a",
|
||||
"paramLongName": "apiURL",
|
||||
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
|
||||
"paramRequired": true
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,21 @@
|
|||
[
|
||||
{
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH to extract files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "s",
|
||||
"paramLongName": "sourcePath",
|
||||
"paramDescription": "the PATH where the tar.gz files were downloaded",
|
||||
"paramRequired": true
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,21 @@
|
|||
[
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH of the DF tables",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "s",
|
||||
"paramLongName": "sourcePath",
|
||||
"paramDescription": "the PATH of the ORCID sequence file",
|
||||
"paramRequired": true
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,23 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,81 @@
|
|||
<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the path to store the original ORCID dump</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>apiURL</name>
|
||||
<description>The figshare API URL to retrieve the list file to download</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="generateTables"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="DownloadDUMP">
|
||||
<java>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
<main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
|
||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--apiURL</arg><arg>${apiURL}</arg>
|
||||
</java>
|
||||
<ok to="extractDump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="extractDump">
|
||||
<java>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
<main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
|
||||
<java-opts> -Xmx6g </java-opts>
|
||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${targetPath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
|
||||
</java>
|
||||
<ok to="generateTables"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="generateTables">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Generate ORCID Tables</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/tables</arg>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,21 @@
|
|||
[
|
||||
{
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH where download the files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "a",
|
||||
"paramLongName": "apiURL",
|
||||
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
|
||||
"paramRequired": true
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,101 @@
|
|||
package eu.dnetlib.dhp.collection.orcid
|
||||
|
||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||
import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
|
||||
import org.apache.hadoop.io.Text
|
||||
import org.apache.spark.SparkContext
|
||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
|
||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||
|
||||
/** Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
override def run(): Unit = {
|
||||
val sourcePath: String = parser.get("sourcePath")
|
||||
log.info("found parameters sourcePath: {}", sourcePath)
|
||||
val targetPath: String = parser.get("targetPath")
|
||||
log.info("found parameters targetPath: {}", targetPath)
|
||||
extractORCIDTable(spark, sourcePath, targetPath)
|
||||
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
|
||||
extractORCIDWorksTable(spark, sourcePath, targetPath)
|
||||
}
|
||||
|
||||
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
import spark.implicits._
|
||||
val df = sc
|
||||
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
|
||||
.map { case (x, y) => (x.toString, y.toString) }
|
||||
.toDF
|
||||
.as[(String, String)]
|
||||
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
|
||||
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||
df.filter(r => r._1.contains("summaries"))
|
||||
.map { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseSummary(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Authors")
|
||||
}
|
||||
|
||||
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
import spark.implicits._
|
||||
val df = sc
|
||||
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
|
||||
.map { case (x, y) => (x.toString, y.toString) }
|
||||
.toDF
|
||||
.as[(String, String)]
|
||||
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
|
||||
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||
df.filter(r => r._1.contains("works"))
|
||||
.map { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseWork(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Works")
|
||||
}
|
||||
|
||||
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
import spark.implicits._
|
||||
val df = sc
|
||||
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
|
||||
.map { case (x, y) => (x.toString, y.toString) }
|
||||
.toDF
|
||||
.as[(String, String)]
|
||||
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
|
||||
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||
df.filter(r => r._1.contains("employments"))
|
||||
.map { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseEmployment(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Employments")
|
||||
}
|
||||
}
|
||||
|
||||
object SparkGenerateORCIDTable {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
|
||||
.initialize()
|
||||
.run()
|
||||
|
||||
}
|
||||
}
|
|
@ -166,7 +166,7 @@ object DataciteToOAFTransformation {
|
|||
resourceTypeGeneral: String,
|
||||
schemaOrg: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier) = {
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
if (resourceType != null && resourceType.nonEmpty) {
|
||||
val typeQualifier =
|
||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||
|
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
|
|||
vocabularies.getSynonymAsQualifier(
|
||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||
typeQualifier.getClassid
|
||||
)
|
||||
),
|
||||
resourceType
|
||||
)
|
||||
}
|
||||
if (schemaOrg != null && schemaOrg.nonEmpty) {
|
||||
|
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
|
|||
vocabularies.getSynonymAsQualifier(
|
||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||
typeQualifier.getClassid
|
||||
)
|
||||
),
|
||||
schemaOrg
|
||||
)
|
||||
|
||||
}
|
||||
|
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
|
|||
vocabularies.getSynonymAsQualifier(
|
||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||
typeQualifier.getClassid
|
||||
)
|
||||
),
|
||||
resourceTypeGeneral
|
||||
)
|
||||
|
||||
}
|
||||
|
@ -216,12 +219,18 @@ object DataciteToOAFTransformation {
|
|||
schemaOrg: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): Result = {
|
||||
val typeQualifiers: (Qualifier, Qualifier) =
|
||||
val typeQualifiers: (Qualifier, Qualifier, String) =
|
||||
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
||||
if (typeQualifiers == null)
|
||||
return null
|
||||
val i = new Instance
|
||||
i.setInstancetype(typeQualifiers._1)
|
||||
// ADD ORIGINAL TYPE
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(typeQualifiers._3)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
i.setInstanceTypeMapping(List(itm).asJava)
|
||||
|
||||
typeQualifiers._2.getClassname match {
|
||||
case "dataset" =>
|
||||
val r = new OafDataset
|
||||
|
|
|
@ -176,7 +176,7 @@ object BioDBToOAF {
|
|||
i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava)
|
||||
}
|
||||
|
||||
if (input.pidType.equalsIgnoreCase("clinicaltrials.gov"))
|
||||
if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) {
|
||||
i.setInstancetype(
|
||||
OafMapperUtils.qualifier(
|
||||
"0037",
|
||||
|
@ -185,7 +185,11 @@ object BioDBToOAF {
|
|||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
else
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(input.pidType)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
i.setInstanceTypeMapping(List(itm).asJava)
|
||||
} else {
|
||||
i.setInstancetype(
|
||||
OafMapperUtils.qualifier(
|
||||
"0046",
|
||||
|
@ -194,6 +198,11 @@ object BioDBToOAF {
|
|||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType("Bioentity")
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
i.setInstanceTypeMapping(List(itm).asJava)
|
||||
}
|
||||
|
||||
if (input.datasource == null || input.datasource.isEmpty)
|
||||
return null
|
||||
|
@ -265,6 +274,10 @@ object BioDBToOAF {
|
|||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType("Bioentity")
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
i.setInstanceTypeMapping(List(itm).asJava)
|
||||
|
||||
i.setCollectedfrom(collectedFromMap("uniprot"))
|
||||
d.setInstance(List(i).asJava)
|
||||
|
@ -471,6 +484,10 @@ object BioDBToOAF {
|
|||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType("Bioentity")
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
i.setInstanceTypeMapping(List(itm).asJava)
|
||||
|
||||
i.setCollectedfrom(collectedFromMap("pdb"))
|
||||
d.setInstance(List(i).asJava)
|
||||
|
@ -571,6 +588,10 @@ object BioDBToOAF {
|
|||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType("Bioentity")
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
i.setInstanceTypeMapping(List(itm).asJava)
|
||||
|
||||
i.setCollectedfrom(collectedFromMap("ebi"))
|
||||
d.setInstance(List(i).asJava)
|
||||
|
|
|
@ -188,13 +188,24 @@ object PubMedToOaf {
|
|||
val cojbCategory =
|
||||
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue)
|
||||
pubmedInstance.setInstancetype(cojbCategory)
|
||||
// ADD ORIGINAL TYPE to the publication
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(ja.get.getValue)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
|
||||
} else {
|
||||
val i_type = article.getPublicationTypes.asScala
|
||||
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))
|
||||
.find(q => q != null)
|
||||
if (i_type.isDefined)
|
||||
pubmedInstance.setInstancetype(i_type.get)
|
||||
else
|
||||
.map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
|
||||
.find(q => q._2 != null)
|
||||
|
||||
if (i_type.isDefined) {
|
||||
pubmedInstance.setInstancetype(i_type.get._2)
|
||||
// ADD ORIGINAL TYPE to the publication
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(i_type.get._1)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
|
||||
} else
|
||||
return null
|
||||
}
|
||||
val result = createResult(pubmedInstance.getInstancetype, vocabularies)
|
||||
|
|
|
@ -79,8 +79,8 @@ public class PrepareAffiliationRelationsTest {
|
|||
.getPath();
|
||||
|
||||
String pubmedAffiliationRelationsPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
||||
.getPath();
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
||||
.getPath();
|
||||
|
||||
String outputPath = workingDir.toString() + "/actionSet";
|
||||
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.SparkContext;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.ximpleware.NavException;
|
||||
import com.ximpleware.ParseException;
|
||||
import com.ximpleware.XPathEvalException;
|
||||
import com.ximpleware.XPathParseException;
|
||||
|
||||
import eu.dnetlib.dhp.collection.orcid.model.Author;
|
||||
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
|
||||
import eu.dnetlib.dhp.parser.utility.VtdException;
|
||||
|
||||
public class DownloadORCIDTest {
|
||||
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
|
||||
|
||||
@Test
|
||||
public void testSummary() throws Exception {
|
||||
final String xml = IOUtils
|
||||
.toString(
|
||||
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
|
||||
|
||||
final OrcidParser parser = new OrcidParser();
|
||||
ORCIDItem orcidItem = parser.parseSummary(xml);
|
||||
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
System.out.println(mapper.writeValueAsString(orcidItem));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParsingWork() throws Exception {
|
||||
|
||||
final List<String> works_path = Arrays
|
||||
.asList(
|
||||
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
|
||||
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
|
||||
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
|
||||
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
|
||||
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
|
||||
|
||||
);
|
||||
|
||||
final OrcidParser parser = new OrcidParser();
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
works_path.stream().map(s -> {
|
||||
try {
|
||||
return IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
getClass()
|
||||
.getResourceAsStream(
|
||||
s)));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).forEach(s -> {
|
||||
try {
|
||||
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParsingEmployments() throws Exception {
|
||||
|
||||
final List<String> works_path = Arrays
|
||||
.asList(
|
||||
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
|
||||
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
|
||||
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
|
||||
|
||||
);
|
||||
|
||||
final OrcidParser parser = new OrcidParser();
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
works_path.stream().map(s -> {
|
||||
try {
|
||||
return IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
getClass()
|
||||
.getResourceAsStream(
|
||||
s)));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).forEach(s -> {
|
||||
try {
|
||||
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
|
||||
<common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
|
||||
<common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
|
||||
<common:path>0000-0002-5982-8983</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Scopus - Elsevier</common:source-name>
|
||||
<common:assertion-origin-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
|
||||
<common:path>0000-0001-5010-5001</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:assertion-origin-orcid>
|
||||
<common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
|
||||
</work:title>
|
||||
<work:journal-title>American Journal of Neuroradiology</work:journal-title>
|
||||
<work:citation>
|
||||
<work:citation-type>bibtex</work:citation-type>
|
||||
<work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2014</common:year>
|
||||
</common:publication-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
<common:external-id>
|
||||
<common:external-id-type>eid</common:external-id-type>
|
||||
<common:external-id-value>2-s2.0-84911865199</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&partnerID=MN8TOARS</common:url>
|
||||
<work:contributors>
|
||||
<work:contributor>
|
||||
<work:credit-name>Durst, C.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Starke, R.M.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Gaughen, J.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Nguyen, Q.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Patrie, J.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Jensen, M.E.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Evans, A.J.</work:credit-name>
|
||||
</work:contributor>
|
||||
</work:contributors>
|
||||
</work:work>
|
|
@ -0,0 +1,79 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work"
|
||||
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
|
||||
path="/0000-0001-5349-4030/work/50101152" visibility="public">
|
||||
<common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
|
||||
<common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
|
||||
<common:path>0000-0002-5982-8983</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Scopus - Elsevier</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
|
||||
racially homogenous and heterogeneous U.S. history classrooms</common:title>
|
||||
</work:title>
|
||||
<work:journal-title>Journal of Social Studies Research</work:journal-title>
|
||||
<work:citation>
|
||||
<work:citation-type>bibtex</work:citation-type>
|
||||
<work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
|
||||
candor in addressing social injustices in racially homogenous and heterogeneous U.S.
|
||||
history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
|
||||
= {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2018</common:year>
|
||||
</common:publication-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
|
||||
<common:external-id-normalized transient="true"
|
||||
>10.1016/j.jssr.2018.01.004</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
<common:external-id>
|
||||
<common:external-id-type>eid</common:external-id-type>
|
||||
<common:external-id-value>2-s2.0-85041949043</common:external-id-value>
|
||||
<common:external-id-normalized transient="true"
|
||||
>2-s2.0-85041949043</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&partnerID=MN8TOARS</common:url>
|
||||
<work:contributors>
|
||||
<work:contributor>
|
||||
<work:credit-name>Parkhouse, H.</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Massaro, V.R.</work:credit-name>
|
||||
</work:contributor>
|
||||
</work:contributors>
|
||||
</work:work>
|
|
@ -0,0 +1,113 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work"
|
||||
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||
<common:path>0000-0002-9157-3431</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Europe PubMed Central</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||
</work:title>
|
||||
<work:citation>
|
||||
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>11</common:month>
|
||||
</common:publication-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmid</common:external-id-type>
|
||||
<common:external-id-value>27899851</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmc</common:external-id-type>
|
||||
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||
<common:external-id-normalized transient="true"
|
||||
>PMC5126442</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||
<work:contributors>
|
||||
<work:contributor>
|
||||
<work:credit-name>Abdel-Dayem K</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Abdel-Dayem Fake</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Eweda II</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>El-Sherbiny A</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Dimitry MO</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Nammas W</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
</work:contributors>
|
||||
</work:work>
|
|
@ -0,0 +1,106 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work"
|
||||
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||
<common:path>0000-0002-9157-3431</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Europe PubMed Central</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||
</work:title>
|
||||
<work:citation>
|
||||
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>11</common:month>
|
||||
</common:publication-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmid</common:external-id-type>
|
||||
<common:external-id-value>27899851</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmc</common:external-id-type>
|
||||
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||
<common:external-id-normalized transient="true"
|
||||
>PMC5126442</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||
<work:contributors>
|
||||
<work:contributor>
|
||||
<work:credit-name>Khair Abde Daye</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Eweda II</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>El-Sherbiny A</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Dimitry MO</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>Nammas W</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>first</work:contributor-sequence>
|
||||
<work:contributor-role>author</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
</work:contributors>
|
||||
</work:work>
|
|
@ -0,0 +1,101 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<work:work xmlns:address="http://www.orcid.org/ns/address"
|
||||
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work"
|
||||
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
|
||||
path="/0000-0003-2760-1191/work/28776099" visibility="public">
|
||||
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
|
||||
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
|
||||
<common:path>0000-0002-9157-3431</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Europe PubMed Central</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
|
||||
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
|
||||
ST-Segment-Elevation Myocardial Infarction.</common:title>
|
||||
</work:title>
|
||||
<work:citation>
|
||||
<work:citation-type>formatted-unspecified</work:citation-type>
|
||||
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
|
||||
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>11</common:month>
|
||||
</common:publication-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmid</common:external-id-type>
|
||||
<common:external-id-value>27899851</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
<common:external-id>
|
||||
<common:external-id-type>pmc</common:external-id-type>
|
||||
<common:external-id-value>PMC5126442</common:external-id-value>
|
||||
<common:external-id-normalized transient="true"
|
||||
>PMC5126442</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
|
||||
<work:contributors>
|
||||
<work:contributor>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>seq0</work:contributor-sequence>
|
||||
<work:contributor-role>role0</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>creditname1</work:credit-name>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>creditname2</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>seq2</work:contributor-sequence>
|
||||
<work:contributor-role></work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name>creditname3</work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence></work:contributor-sequence>
|
||||
<work:contributor-role>role3</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
<work:contributor>
|
||||
<work:credit-name></work:credit-name>
|
||||
<work:contributor-attributes>
|
||||
<work:contributor-sequence>seq4</work:contributor-sequence>
|
||||
<work:contributor-role>role4</work:contributor-role>
|
||||
</work:contributor-attributes>
|
||||
</work:contributor>
|
||||
</work:contributors>
|
||||
</work:work>
|
|
@ -0,0 +1,50 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
|
||||
xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
|
||||
put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
|
||||
visibility="public">
|
||||
<common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
|
||||
<common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
|
||||
<common:path>0000-0001-5010-5001</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Quang Nguyen</common:source-name>
|
||||
</common:source>
|
||||
<common:organization>
|
||||
<common:name>Beth Israel Deaconess Medical Center</common:name>
|
||||
<common:address>
|
||||
<common:city>Boston</common:city>
|
||||
<common:region>MA</common:region>
|
||||
<common:country>US</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment>
|
|
@ -0,0 +1,55 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
|
||||
xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
|
||||
put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
|
||||
visibility="public">
|
||||
<common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
|
||||
<common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
|
||||
<common:path>0000-0001-5011-3001</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>zhengyan li</common:source-name>
|
||||
</common:source>
|
||||
<common:start-date>
|
||||
<common:year>2008</common:year>
|
||||
<common:month>09</common:month>
|
||||
<common:day>01</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Anhui Academy of Agricultural Sciences</common:name>
|
||||
<common:address>
|
||||
<common:city>Hefei</common:city>
|
||||
<common:region>Anhui</common:region>
|
||||
<common:country>CN</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment>
|
|
@ -0,0 +1,62 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
|
||||
xmlns:history="http://www.orcid.org/ns/history"
|
||||
xmlns:employment="http://www.orcid.org/ns/employment"
|
||||
xmlns:education="http://www.orcid.org/ns/education"
|
||||
xmlns:other-name="http://www.orcid.org/ns/other-name"
|
||||
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
|
||||
xmlns:funding="http://www.orcid.org/ns/funding"
|
||||
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
|
||||
xmlns:service="http://www.orcid.org/ns/service"
|
||||
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
|
||||
xmlns:distinction="http://www.orcid.org/ns/distinction"
|
||||
xmlns:internal="http://www.orcid.org/ns/internal"
|
||||
xmlns:membership="http://www.orcid.org/ns/membership"
|
||||
xmlns:person="http://www.orcid.org/ns/person"
|
||||
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
|
||||
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
|
||||
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
|
||||
xmlns:activities="http://www.orcid.org/ns/activities"
|
||||
xmlns:qualification="http://www.orcid.org/ns/qualification"
|
||||
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
|
||||
xmlns:error="http://www.orcid.org/ns/error"
|
||||
xmlns:preferences="http://www.orcid.org/ns/preferences"
|
||||
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
|
||||
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
|
||||
put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
|
||||
visibility="public">
|
||||
<common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
|
||||
<common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
|
||||
<common:path>0000-0001-5012-1001</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Asma Bazzi</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Pathology and Laboratory Medicine</common:department-name>
|
||||
<common:role-title>Medical Laboratory Technologist</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1994</common:year>
|
||||
<common:month>10</common:month>
|
||||
<common:day>01</common:day>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2000</common:year>
|
||||
<common:month>06</common:month>
|
||||
<common:day>30</common:day>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>American University of Beirut</common:name>
|
||||
<common:address>
|
||||
<common:city>Hamra</common:city>
|
||||
<common:region>Beirut</common:region>
|
||||
<common:country>LB</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment>
|
|
@ -0,0 +1,581 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
|
||||
<common:orcid-identifier>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:orcid-identifier>
|
||||
<preferences:preferences>
|
||||
<preferences:locale>es</preferences:locale>
|
||||
</preferences:preferences>
|
||||
<history:history>
|
||||
<history:creation-method>Direct</history:creation-method>
|
||||
<history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
|
||||
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||
<history:claimed>true</history:claimed>
|
||||
<history:verified-email>true</history:verified-email>
|
||||
<history:verified-primary-email>true</history:verified-primary-email>
|
||||
</history:history>
|
||||
<person:person path="/0000-0001-5045-1000/person">
|
||||
<person:name visibility="public" path="0000-0001-5045-1000">
|
||||
<common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
|
||||
<personal-details:given-names>Patricio</personal-details:given-names>
|
||||
<personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
|
||||
</person:name>
|
||||
<other-name:other-names path="/0000-0001-5045-1000/other-names"/>
|
||||
<person:biography visibility="public" path="/0000-0001-5045-1000/biography">
|
||||
<common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
|
||||
<personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
|
||||
</person:biography>
|
||||
<researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
|
||||
<email:emails path="/0000-0001-5045-1000/email"/>
|
||||
<address:addresses path="/0000-0001-5045-1000/address"/>
|
||||
<keyword:keywords path="/0000-0001-5045-1000/keywords"/>
|
||||
<external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
|
||||
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
|
||||
<external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
|
||||
<common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
|
||||
<common:path>0000-0002-5982-8983</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Scopus - Elsevier</common:source-name>
|
||||
<common:assertion-origin-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
|
||||
<common:path>0000-0001-7291-3210</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:assertion-origin-orcid>
|
||||
<common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
|
||||
</common:source>
|
||||
<common:external-id-type>Scopus Author ID</common:external-id-type>
|
||||
<common:external-id-value>6602255248</common:external-id-value>
|
||||
<common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&partnerID=MN8TOARS</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</external-identifier:external-identifier>
|
||||
</external-identifier:external-identifiers>
|
||||
</person:person>
|
||||
<activities:activities-summary path="/0000-0001-5045-1000/activities">
|
||||
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||
<activities:distinctions path="/0000-0001-5045-1000/distinctions">
|
||||
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
|
||||
<common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Programa de Maestría</common:department-name>
|
||||
<common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>10</common:month>
|
||||
<common:day>20</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Instituto de Altos Estudios Nacionales</common:name>
|
||||
<common:address>
|
||||
<common:city>Quito</common:city>
|
||||
<common:region>Pichincha</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</distinction:distinction-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:distinctions>
|
||||
<activities:educations path="/0000-0001-5045-1000/educations">
|
||||
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
|
||||
<common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Programa de Doctorado en Sociología</common:department-name>
|
||||
<common:role-title>Doctorando del Programa de Sociología</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2020</common:year>
|
||||
<common:month>11</common:month>
|
||||
<common:day>06</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
|
||||
<common:address>
|
||||
<common:city>Madrid</common:city>
|
||||
<common:region>Comunidad de Madrid</common:region>
|
||||
<common:country>ES</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:educations>
|
||||
<activities:employments path="/0000-0001-5045-1000/employments">
|
||||
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
|
||||
<common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
|
||||
<common:role-title>Especialista de Proyectos y docente</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2021</common:year>
|
||||
<common:month>11</common:month>
|
||||
<common:day>01</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Universidad de las Artes</common:name>
|
||||
<common:address>
|
||||
<common:city>Guayaquil</common:city>
|
||||
<common:region>Guayas</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
|
||||
<common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
|
||||
<common:role-title>Director</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2019</common:year>
|
||||
<common:month>11</common:month>
|
||||
<common:day>05</common:day>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2021</common:year>
|
||||
<common:month>10</common:month>
|
||||
<common:day>31</common:day>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Universidad Regional Amazónica IKIAM</common:name>
|
||||
<common:address>
|
||||
<common:city>Tena</common:city>
|
||||
<common:region>Napo</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
<common:url>http://ikiam.edu.ec</common:url>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:employments>
|
||||
<activities:fundings path="/0000-0001-5045-1000/fundings"/>
|
||||
<activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
|
||||
<activities:memberships path="/0000-0001-5045-1000/memberships">
|
||||
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
|
||||
<common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
|
||||
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Artes Escénicas</common:department-name>
|
||||
<common:role-title>Miembro</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2000</common:year>
|
||||
<common:month>07</common:month>
|
||||
<common:day>15</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Casa de la Cultura Ecuatoriana</common:name>
|
||||
<common:address>
|
||||
<common:city>Riobamba</common:city>
|
||||
<common:region>Sierra Centro</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
</common:organization>
|
||||
</membership:membership-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:memberships>
|
||||
<activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
|
||||
<activities:qualifications path="/0000-0001-5045-1000/qualifications">
|
||||
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
|
||||
<common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Programa de Gobernabilidad</common:department-name>
|
||||
<common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>10</common:month>
|
||||
<common:day>20</common:day>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2017</common:year>
|
||||
<common:month>01</common:month>
|
||||
<common:day>26</common:day>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Instituto de Altos Estudios Nacionales</common:name>
|
||||
<common:address>
|
||||
<common:city>Quito</common:city>
|
||||
<common:region>Pichincha</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</qualification:qualification-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
|
||||
<common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Posgrados</common:department-name>
|
||||
<common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2001</common:year>
|
||||
<common:month>03</common:month>
|
||||
<common:day>09</common:day>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2003</common:year>
|
||||
<common:month>02</common:month>
|
||||
<common:day>27</common:day>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Universidad Nacional de Chimborazo</common:name>
|
||||
<common:address>
|
||||
<common:city>Riobamba</common:city>
|
||||
<common:region>Chimborazo</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</qualification:qualification-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
|
||||
<common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Ciencias de la Educación</common:department-name>
|
||||
<common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1994</common:year>
|
||||
<common:month>10</common:month>
|
||||
<common:day>03</common:day>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2000</common:year>
|
||||
<common:month>01</common:month>
|
||||
<common:day>31</common:day>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Universidad Nacional de Chimborazo</common:name>
|
||||
<common:address>
|
||||
<common:city>Riobamba</common:city>
|
||||
<common:region>Chimborazo</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</qualification:qualification-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
|
||||
<common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
|
||||
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name>Facultad de Artes</common:department-name>
|
||||
<common:role-title>Licenciado en Artes</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1989</common:year>
|
||||
<common:month>09</common:month>
|
||||
<common:day>05</common:day>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>1997</common:year>
|
||||
<common:month>08</common:month>
|
||||
<common:day>07</common:day>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Universidad Central del Ecuador</common:name>
|
||||
<common:address>
|
||||
<common:city>Quito</common:city>
|
||||
<common:region>Pichincha</common:region>
|
||||
<common:country>EC</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</qualification:qualification-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:qualifications>
|
||||
<activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
|
||||
<activities:services path="/0000-0001-5045-1000/services"/>
|
||||
<activities:works path="/0000-0001-5045-1000/works">
|
||||
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
|
||||
<common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
|
||||
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>isbn</common:external-id-type>
|
||||
<common:external-id-value>978-9942-29-089-2</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
|
||||
<common:external-id-relationship>part-of</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:type>book-chapter</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>06</common:month>
|
||||
<common:day>07</common:day>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
|
||||
<common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
|
||||
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
|
||||
</work:title>
|
||||
<common:external-ids/>
|
||||
<work:type>conference-abstract</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2022</common:year>
|
||||
<common:month>10</common:month>
|
||||
<common:day>06</common:day>
|
||||
</common:publication-date>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>other-id</common:external-id-type>
|
||||
<common:external-id-value>2018</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
|
||||
<common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
|
||||
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>other-id</common:external-id-type>
|
||||
<common:external-id-value>2018</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
|
||||
<work:type>conference-poster</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2018</common:year>
|
||||
<common:month>11</common:month>
|
||||
<common:day>30</common:day>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
|
||||
<common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
|
||||
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
|
||||
</work:title>
|
||||
<common:external-ids/>
|
||||
<work:type>dissertation-thesis</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2017</common:year>
|
||||
<common:month>01</common:month>
|
||||
<common:day>26</common:day>
|
||||
</common:publication-date>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
|
||||
<common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
|
||||
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
|
||||
<common:path>0000-0001-5045-1000</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>La Rebelión de los Dioses</common:title>
|
||||
</work:title>
|
||||
<common:external-ids/>
|
||||
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
|
||||
<work:type>registered-copyright</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2001</common:year>
|
||||
<common:month>08</common:month>
|
||||
<common:day>28</common:day>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Editorial pedagógica freire</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
</activities:works>
|
||||
</activities:activities-summary>
|
||||
</record:record>
|
|
@ -16,6 +16,10 @@
|
|||
<name>filterInvisible</name>
|
||||
<description>whether filter out invisible entities after merge</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the URL address of the lookUp service</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>heap memory for driver process</description>
|
||||
|
@ -128,6 +132,7 @@
|
|||
<arg>--graphInputPath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--checkpointPath</arg><arg>${workingPath}/grouped_entities</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--filterInvisible</arg><arg>${filterInvisible}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -133,32 +133,6 @@
|
|||
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="PreProcessORCID"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<!-- ORCID SECTION -->
|
||||
<action name="PreProcessORCID">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert ORCID to Dataset</name>
|
||||
<class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
|
|
@ -59,10 +59,10 @@
|
|||
</property>
|
||||
|
||||
<!-- ORCID Parameters -->
|
||||
<property>
|
||||
<name>workingPathOrcid</name>
|
||||
<description>the ORCID working path</description>
|
||||
</property>
|
||||
<!-- <property>-->
|
||||
<!-- <name>workingPathOrcid</name>-->
|
||||
<!-- <description>the ORCID working path</description>-->
|
||||
<!-- </property>-->
|
||||
|
||||
</parameters>
|
||||
|
||||
|
@ -84,7 +84,6 @@
|
|||
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
|
||||
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
|
||||
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
|
||||
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
|
||||
<case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
|
||||
<case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
|
||||
<default to="ConvertCrossrefToOAF"/>
|
||||
|
@ -170,32 +169,6 @@
|
|||
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="ProcessORCID"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<!-- ORCID SECTION -->
|
||||
<action name="ProcessORCID">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert ORCID to Dataset</name>
|
||||
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
|
||||
<arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="CreateDOIBoost"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
|
|
@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
|
|||
Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
||||
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
|
||||
|
||||
logger.info("Phase 2) Join Crossref with UnpayWall")
|
||||
logger.info("Phase 1) Join Crossref with UnpayWall")
|
||||
|
||||
val crossrefPublication: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
|
||||
|
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
|
|||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$workingDirPath/firstJoin")
|
||||
logger.info("Phase 3) Join Result with ORCID")
|
||||
val fj: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
|
||||
val orcidPublication: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
|
||||
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
|
||||
.map(applyMerge)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$workingDirPath/secondJoin")
|
||||
|
||||
logger.info("Phase 4) Join Result with MAG")
|
||||
logger.info("Phase 2) Join Result with MAG")
|
||||
val sj: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
|
||||
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
|
||||
|
||||
val magPublication: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
|
||||
|
|
|
@ -107,7 +107,7 @@ case object Crossref2Oaf {
|
|||
.map(f => f.id)
|
||||
}
|
||||
|
||||
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
||||
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
|
@ -283,6 +283,11 @@ case object Crossref2Oaf {
|
|||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
//ADD ORIGINAL TYPE to the mapping
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(originalType)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
instance.setInstanceTypeMapping(List(itm).asJava)
|
||||
result.setResourcetype(
|
||||
OafMapperUtils.qualifier(
|
||||
cobjCategory.substring(0, 4),
|
||||
|
@ -367,7 +372,9 @@ case object Crossref2Oaf {
|
|||
objectType,
|
||||
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
||||
)
|
||||
mappingResult(result, json, cOBJCategory)
|
||||
|
||||
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
|
||||
mappingResult(result, json, cOBJCategory, originalType)
|
||||
if (result == null || result.getId == null)
|
||||
return List()
|
||||
|
||||
|
|
|
@ -71,6 +71,9 @@ public class PropagationConstant {
|
|||
public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID = "result:community:organization";
|
||||
public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME = " Propagation of result belonging to community through organization";
|
||||
|
||||
public static final String PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID = "result:community:project";
|
||||
public static final String PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME = " Propagation of result belonging to community through project";
|
||||
|
||||
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
|
||||
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";
|
||||
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
|
||||
package eu.dnetlib.dhp.api;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 06/10/23
|
||||
*/
|
||||
public class QueryCommunityAPI {
|
||||
|
||||
private static String get(String geturl) throws IOException {
|
||||
URL url = new URL(geturl);
|
||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||
conn.setDoOutput(true);
|
||||
conn.setRequestMethod("GET");
|
||||
|
||||
int responseCode = conn.getResponseCode();
|
||||
String body = getBody(conn);
|
||||
conn.disconnect();
|
||||
if (responseCode != HttpURLConnection.HTTP_OK)
|
||||
throw new IOException("Unexpected code " + responseCode + body);
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
public static String communities(String baseURL) throws IOException {
|
||||
|
||||
return get(baseURL + "communities");
|
||||
|
||||
}
|
||||
|
||||
public static String community(String id, String baseURL) throws IOException {
|
||||
|
||||
return get(baseURL + id);
|
||||
|
||||
}
|
||||
|
||||
public static String communityDatasource(String id, String baseURL) throws IOException {
|
||||
|
||||
return get(baseURL + id + "/contentproviders");
|
||||
|
||||
}
|
||||
|
||||
public static String communityPropagationOrganization(String id, String baseURL) throws IOException {
|
||||
|
||||
return get(baseURL + id + "/propagationOrganizations");
|
||||
|
||||
}
|
||||
|
||||
public static String communityProjects(String id, String page, String size, String baseURL) throws IOException {
|
||||
|
||||
return get(baseURL + id + "/projects/" + page + "/" + size);
|
||||
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static String getBody(HttpURLConnection conn) throws IOException {
|
||||
String body = "{}";
|
||||
try (BufferedReader br = new BufferedReader(
|
||||
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
||||
StringBuilder response = new StringBuilder();
|
||||
String responseLine = null;
|
||||
while ((responseLine = br.readLine()) != null) {
|
||||
response.append(responseLine.trim());
|
||||
}
|
||||
|
||||
body = response.toString();
|
||||
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
|
||||
package eu.dnetlib.dhp.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.management.Query;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.amazonaws.util.StringUtils;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.dhp.api.model.*;
|
||||
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||
import eu.dnetlib.dhp.bulktag.community.Provider;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
*/
|
||||
public class Utils implements Serializable {
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Utils.class);
|
||||
|
||||
public static CommunityConfiguration getCommunityConfiguration(String baseURL) throws IOException {
|
||||
final Map<String, Community> communities = Maps.newHashMap();
|
||||
List<Community> validCommunities = new ArrayList<>();
|
||||
getValidCommunities(baseURL)
|
||||
.forEach(community -> {
|
||||
try {
|
||||
CommunityModel cm = MAPPER
|
||||
.readValue(QueryCommunityAPI.community(community.getId(), baseURL), CommunityModel.class);
|
||||
validCommunities.add(getCommunity(cm));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
validCommunities.forEach(community -> {
|
||||
try {
|
||||
DatasourceList dl = MAPPER
|
||||
.readValue(
|
||||
QueryCommunityAPI.communityDatasource(community.getId(), baseURL), DatasourceList.class);
|
||||
community.setProviders(dl.stream().map(d -> {
|
||||
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
|
||||
return null;
|
||||
Provider p = new Provider();
|
||||
p.setOpenaireId("10|" + d.getOpenaireId());
|
||||
p.setSelectionConstraints(d.getSelectioncriteria());
|
||||
if (p.getSelectionConstraints() != null)
|
||||
p.getSelectionConstraints().setSelection(resolver);
|
||||
return p;
|
||||
})
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList()));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
|
||||
validCommunities.forEach(community -> {
|
||||
if (community.isValid())
|
||||
communities.put(community.getId(), community);
|
||||
});
|
||||
return new CommunityConfiguration(communities);
|
||||
}
|
||||
|
||||
private static Community getCommunity(CommunityModel cm) {
|
||||
Community c = new Community();
|
||||
c.setId(cm.getId());
|
||||
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
||||
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
|
||||
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
||||
c.setSubjects(cm.getSubjects());
|
||||
c.getSubjects().addAll(cm.getFos());
|
||||
c.getSubjects().addAll(cm.getSdg());
|
||||
if (cm.getAdvancedConstraints() != null) {
|
||||
c.setConstraints(cm.getAdvancedConstraints());
|
||||
c.getConstraints().setSelection(resolver);
|
||||
}
|
||||
if (cm.getRemoveConstraints() != null) {
|
||||
c.setRemoveConstraints(cm.getRemoveConstraints());
|
||||
c.getRemoveConstraints().setSelection(resolver);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException {
|
||||
return MAPPER
|
||||
.readValue(QueryCommunityAPI.communities(baseURL), CommunitySummary.class)
|
||||
.stream()
|
||||
.filter(
|
||||
community -> !community.getStatus().equals("hidden") &&
|
||||
(community.getType().equals("ri") || community.getType().equals("community")))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* it returns for each organization the list of associated communities
|
||||
*/
|
||||
public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException {
|
||||
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||
getValidCommunities(baseURL)
|
||||
.forEach(community -> {
|
||||
String id = community.getId();
|
||||
try {
|
||||
List<String> associatedOrgs = MAPPER
|
||||
.readValue(
|
||||
QueryCommunityAPI.communityPropagationOrganization(id, baseURL), OrganizationList.class);
|
||||
associatedOrgs.forEach(o -> {
|
||||
if (!organizationMap
|
||||
.keySet()
|
||||
.contains(
|
||||
"20|" + o))
|
||||
organizationMap.put("20|" + o, new ArrayList<>());
|
||||
organizationMap.get("20|" + o).add(community.getId());
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
|
||||
return organizationMap;
|
||||
}
|
||||
|
||||
public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException {
|
||||
CommunityEntityMap projectMap = new CommunityEntityMap();
|
||||
|
||||
getValidCommunities(baseURL)
|
||||
.forEach(community -> {
|
||||
int page = -1;
|
||||
int size = 100;
|
||||
ContentModel cm = new ContentModel();
|
||||
do {
|
||||
page++;
|
||||
try {
|
||||
cm = MAPPER
|
||||
.readValue(
|
||||
QueryCommunityAPI
|
||||
.communityProjects(
|
||||
community.getId(), String.valueOf(page), String.valueOf(size), baseURL),
|
||||
ContentModel.class);
|
||||
if (cm.getContent().size() > 0) {
|
||||
cm.getContent().forEach(p -> {
|
||||
if (!projectMap.keySet().contains("40|" + p.getOpenaireId()))
|
||||
projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>());
|
||||
projectMap.get("40|" + p.getOpenaireId()).add(community.getId());
|
||||
});
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
} while (!cm.getLast());
|
||||
});
|
||||
return projectMap;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
|
||||
@JsonAutoDetect
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class CommunityContentprovider {
|
||||
private String openaireId;
|
||||
private SelectionConstraints selectioncriteria;
|
||||
|
||||
private String enabled;
|
||||
|
||||
public String getEnabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
public void setEnabled(String enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
public String getOpenaireId() {
|
||||
return openaireId;
|
||||
}
|
||||
|
||||
public void setOpenaireId(final String openaireId) {
|
||||
this.openaireId = openaireId;
|
||||
}
|
||||
|
||||
public SelectionConstraints getSelectioncriteria() {
|
||||
|
||||
return this.selectioncriteria;
|
||||
}
|
||||
|
||||
public void setSelectioncriteria(SelectionConstraints selectioncriteria) {
|
||||
this.selectioncriteria = selectioncriteria;
|
||||
|
||||
}
|
||||
}
|
|
@ -1,13 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.resulttocommunityfromorganization;
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class OrganizationMap extends HashMap<String, List<String>> {
|
||||
public class CommunityEntityMap extends HashMap<String, List<String>> {
|
||||
|
||||
public OrganizationMap() {
|
||||
public CommunityEntityMap() {
|
||||
super();
|
||||
}
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 06/10/23
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class CommunityModel implements Serializable {
|
||||
private String id;
|
||||
private String type;
|
||||
private String status;
|
||||
|
||||
private String zenodoCommunity;
|
||||
private List<String> subjects;
|
||||
private List<String> otherZenodoCommunities;
|
||||
private List<String> fos;
|
||||
private List<String> sdg;
|
||||
private SelectionConstraints advancedConstraints;
|
||||
private SelectionConstraints removeConstraints;
|
||||
|
||||
public String getZenodoCommunity() {
|
||||
return zenodoCommunity;
|
||||
}
|
||||
|
||||
public void setZenodoCommunity(String zenodoCommunity) {
|
||||
this.zenodoCommunity = zenodoCommunity;
|
||||
}
|
||||
|
||||
public List<String> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<String> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public List<String> getOtherZenodoCommunities() {
|
||||
return otherZenodoCommunities;
|
||||
}
|
||||
|
||||
public void setOtherZenodoCommunities(List<String> otherZenodoCommunities) {
|
||||
this.otherZenodoCommunities = otherZenodoCommunities;
|
||||
}
|
||||
|
||||
public List<String> getFos() {
|
||||
return fos;
|
||||
}
|
||||
|
||||
public void setFos(List<String> fos) {
|
||||
this.fos = fos;
|
||||
}
|
||||
|
||||
public List<String> getSdg() {
|
||||
return sdg;
|
||||
}
|
||||
|
||||
public void setSdg(List<String> sdg) {
|
||||
this.sdg = sdg;
|
||||
}
|
||||
|
||||
public SelectionConstraints getRemoveConstraints() {
|
||||
return removeConstraints;
|
||||
}
|
||||
|
||||
public void setRemoveConstraints(SelectionConstraints removeConstraints) {
|
||||
this.removeConstraints = removeConstraints;
|
||||
}
|
||||
|
||||
public SelectionConstraints getAdvancedConstraints() {
|
||||
return advancedConstraints;
|
||||
}
|
||||
|
||||
public void setAdvancedConstraints(SelectionConstraints advancedConstraints) {
|
||||
this.advancedConstraints = advancedConstraints;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(String status) {
|
||||
this.status = status;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 06/10/23
|
||||
*/
|
||||
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
|
||||
public CommunitySummary() {
|
||||
super();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class ContentModel implements Serializable {
|
||||
private List<ProjectModel> content;
|
||||
private Integer totalPages;
|
||||
private Boolean last;
|
||||
private Integer number;
|
||||
|
||||
public List<ProjectModel> getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContent(List<ProjectModel> content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public Integer getTotalPages() {
|
||||
return totalPages;
|
||||
}
|
||||
|
||||
public void setTotalPages(Integer totalPages) {
|
||||
this.totalPages = totalPages;
|
||||
}
|
||||
|
||||
public Boolean getLast() {
|
||||
return last;
|
||||
}
|
||||
|
||||
public void setLast(Boolean last) {
|
||||
this.last = last;
|
||||
}
|
||||
|
||||
public Integer getNumber() {
|
||||
return number;
|
||||
}
|
||||
|
||||
public void setNumber(Integer number) {
|
||||
this.number = number;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
|
||||
|
||||
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
|
||||
public DatasourceList() {
|
||||
super();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
*/
|
||||
public class OrganizationList extends ArrayList<String> implements Serializable {
|
||||
|
||||
public OrganizationList() {
|
||||
super();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class ProjectModel implements Serializable {
|
||||
|
||||
private String openaireId;
|
||||
|
||||
public String getOpenaireId() {
|
||||
return openaireId;
|
||||
}
|
||||
|
||||
public void setOpenaireId(String openaireId) {
|
||||
this.openaireId = openaireId;
|
||||
}
|
||||
}
|
|
@ -9,7 +9,6 @@ import java.util.*;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -21,8 +20,11 @@ import org.slf4j.LoggerFactory;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.bulktag.community.*;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
@ -54,50 +56,39 @@ public class SparkBulkTagJob {
|
|||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
Boolean isTest = Optional
|
||||
.ofNullable(parser.get("isTest"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.FALSE);
|
||||
log.info("isTest: {} ", isTest);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String baseURL = parser.get("baseURL");
|
||||
log.info("baseURL: {}", baseURL);
|
||||
|
||||
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
|
||||
log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final Boolean saveGraph = Optional
|
||||
.ofNullable(parser.get("saveGraph"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
CommunityConfiguration cc;
|
||||
|
||||
String taggingConf = parser.get("taggingConf");
|
||||
String taggingConf = Optional
|
||||
.ofNullable(parser.get("taggingConf"))
|
||||
.map(String::valueOf)
|
||||
.orElse(null);
|
||||
|
||||
if (isTest) {
|
||||
if (taggingConf != null) {
|
||||
cc = CommunityConfigurationFactory.newInstance(taggingConf);
|
||||
} else {
|
||||
cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl"));
|
||||
cc = Utils.getCommunityConfiguration(baseURL);
|
||||
log.info(OBJECT_MAPPER.writeValueAsString(cc));
|
||||
}
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
|
||||
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
|
||||
execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -106,10 +97,7 @@ public class SparkBulkTagJob {
|
|||
|
||||
Dataset<String> datasources = readPath(
|
||||
spark, inputPath
|
||||
.substring(
|
||||
0,
|
||||
inputPath.lastIndexOf("/"))
|
||||
+ "/datasource",
|
||||
+ "datasource",
|
||||
Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
|
||||
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
|
||||
|
@ -117,10 +105,10 @@ public class SparkBulkTagJob {
|
|||
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
|
||||
|
||||
for (String ds : datasources.collectAsList()) {
|
||||
final String dsId = ds.substring(3);
|
||||
if (!dsm.containsKey(dsId)) {
|
||||
// final String dsId = ds.substring(3);
|
||||
if (!dsm.containsKey(ds)) {
|
||||
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
|
||||
dsm.put(dsId, eoscList);
|
||||
dsm.put(ds, eoscList);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -142,22 +130,30 @@ public class SparkBulkTagJob {
|
|||
String inputPath,
|
||||
String outputPath,
|
||||
ProtoMap protoMappingParams,
|
||||
Class<R> resultClazz,
|
||||
CommunityConfiguration communityConfiguration) {
|
||||
|
||||
ResultTagger resultTagger = new ResultTagger();
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> resultTagger
|
||||
.enrichContextCriteria(
|
||||
value, communityConfiguration, protoMappingParams),
|
||||
Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
ModelSupport.entityTypes
|
||||
.keySet()
|
||||
.parallelStream()
|
||||
.filter(ModelSupport::isResult)
|
||||
.forEach(e -> {
|
||||
removeOutputDir(spark, outputPath + e.name());
|
||||
ResultTagger resultTagger = new ResultTagger();
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
readPath(spark, inputPath + e.name(), resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> resultTagger
|
||||
.enrichContextCriteria(
|
||||
value, communityConfiguration, protoMappingParams),
|
||||
Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + e.name());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
|
|||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
|
@ -13,7 +14,7 @@ public class Community implements Serializable {
|
|||
private String id;
|
||||
private List<String> subjects = new ArrayList<>();
|
||||
private List<Provider> providers = new ArrayList<>();
|
||||
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
||||
private List<String> zenodoCommunities = new ArrayList<>();
|
||||
private SelectionConstraints constraints = new SelectionConstraints();
|
||||
private SelectionConstraints removeConstraints = new SelectionConstraints();
|
||||
|
||||
|
@ -26,7 +27,7 @@ public class Community implements Serializable {
|
|||
return !getSubjects().isEmpty()
|
||||
|| !getProviders().isEmpty()
|
||||
|| !getZenodoCommunities().isEmpty()
|
||||
|| getConstraints().getCriteria() != null;
|
||||
|| (Optional.ofNullable(getConstraints()).isPresent() && getConstraints().getCriteria() != null);
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
|
@ -53,11 +54,11 @@ public class Community implements Serializable {
|
|||
this.providers = providers;
|
||||
}
|
||||
|
||||
public List<ZenodoCommunity> getZenodoCommunities() {
|
||||
public List<String> getZenodoCommunities() {
|
||||
return zenodoCommunities;
|
||||
}
|
||||
|
||||
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
|
||||
public void setZenodoCommunities(List<String> zenodoCommunities) {
|
||||
this.zenodoCommunities = zenodoCommunities;
|
||||
}
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ public class CommunityConfiguration implements Serializable {
|
|||
this.removeConstraintsMap = removeConstraintsMap;
|
||||
}
|
||||
|
||||
CommunityConfiguration(final Map<String, Community> communities) {
|
||||
public CommunityConfiguration(final Map<String, Community> communities) {
|
||||
this.communities = communities;
|
||||
init();
|
||||
}
|
||||
|
@ -117,10 +117,10 @@ public class CommunityConfiguration implements Serializable {
|
|||
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
|
||||
}
|
||||
// get zenodo communities
|
||||
for (ZenodoCommunity zc : c.getZenodoCommunities()) {
|
||||
for (String zc : c.getZenodoCommunities()) {
|
||||
add(
|
||||
zc.getZenodoCommunityId(),
|
||||
new Pair<>(id, zc.getSelCriteria()),
|
||||
zc,
|
||||
new Pair<>(id, null),
|
||||
zenodocommunityMap);
|
||||
}
|
||||
selectionConstraintsMap.put(id, c.getConstraints());
|
||||
|
|
|
@ -143,16 +143,16 @@ public class CommunityConfigurationFactory {
|
|||
return providerList;
|
||||
}
|
||||
|
||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
||||
private static List<String> parseZenodoCommunities(final Node node) {
|
||||
|
||||
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
||||
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
|
||||
final List<String> zenodoCommunityList = new ArrayList<>();
|
||||
for (Node n : list) {
|
||||
ZenodoCommunity zc = new ZenodoCommunity();
|
||||
zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
|
||||
zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
|
||||
// ZenodoCommunity zc = new ZenodoCommunity();
|
||||
// zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
|
||||
// zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
|
||||
|
||||
zenodoCommunityList.add(zc);
|
||||
zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText());
|
||||
}
|
||||
|
||||
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
||||
|
|
|
@ -4,6 +4,8 @@ package eu.dnetlib.dhp.bulktag.community;
|
|||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
|
||||
|
@ -12,6 +14,7 @@ public class Constraint implements Serializable {
|
|||
private String field;
|
||||
private String value;
|
||||
// private String element;
|
||||
@JsonIgnore
|
||||
private Selection selection;
|
||||
|
||||
public String getVerb() {
|
||||
|
@ -38,10 +41,11 @@ public class Constraint implements Serializable {
|
|||
this.value = value;
|
||||
}
|
||||
|
||||
public void setSelection(Selection sel) {
|
||||
selection = sel;
|
||||
}
|
||||
|
||||
//@JsonIgnore
|
||||
// public void setSelection(Selection sel) {
|
||||
// selection = sel;
|
||||
// }
|
||||
@JsonIgnore
|
||||
public void setSelection(VerbResolver resolver)
|
||||
throws InvocationTargetException, NoSuchMethodException, InstantiationException,
|
||||
IllegalAccessException {
|
||||
|
@ -52,11 +56,4 @@ public class Constraint implements Serializable {
|
|||
return selection.apply(metadata);
|
||||
}
|
||||
|
||||
// public String getElement() {
|
||||
// return element;
|
||||
// }
|
||||
//
|
||||
// public void setElement(String element) {
|
||||
// this.element = element;
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class QueryInformationSystem {
|
||||
|
||||
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
|
||||
throws ISLookUpException, DocumentException, SAXException, IOException {
|
||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
final List<String> res = isLookUp
|
||||
.quickSearchProfile(
|
||||
IOUtils
|
||||
.toString(
|
||||
QueryInformationSystem.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/bulktag/query.xq")));
|
||||
|
||||
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
|
||||
|
||||
return CommunityConfigurationFactory.newInstance(xmlConf);
|
||||
}
|
||||
}
|
|
@ -82,19 +82,23 @@ public class ResultTagger implements Serializable {
|
|||
// communities contains all the communities to be not added to the context
|
||||
final Set<String> removeCommunities = new HashSet<>();
|
||||
|
||||
// if (conf.getRemoveConstraintsMap().keySet().size() > 0)
|
||||
conf
|
||||
.getRemoveConstraintsMap()
|
||||
.keySet()
|
||||
.forEach(communityId -> {
|
||||
if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
conf
|
||||
.getRemoveConstraintsMap()
|
||||
.get(communityId)
|
||||
.getCriteria()
|
||||
.stream()
|
||||
.anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
removeCommunities.add(communityId);
|
||||
});
|
||||
.forEach(
|
||||
communityId -> {
|
||||
// log.info("Remove constraints for " + communityId);
|
||||
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
|
||||
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
conf
|
||||
.getRemoveConstraintsMap()
|
||||
.get(communityId)
|
||||
.getCriteria()
|
||||
.stream()
|
||||
.anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
removeCommunities.add(communityId);
|
||||
});
|
||||
|
||||
// communities contains all the communities to be added as context for the result
|
||||
final Set<String> communities = new HashSet<>();
|
||||
|
@ -124,10 +128,10 @@ public class ResultTagger implements Serializable {
|
|||
if (Objects.nonNull(result.getInstance())) {
|
||||
for (Instance i : result.getInstance()) {
|
||||
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
|
||||
collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
||||
collfrom.add(i.getCollectedfrom().getKey());
|
||||
}
|
||||
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
|
||||
hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
||||
hostdby.add(i.getHostedby().getKey());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,11 +7,13 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
|
||||
@JsonAutoDetect
|
||||
public class SelectionConstraints implements Serializable {
|
||||
private List<Constraints> criteria;
|
||||
|
||||
|
|
|
@ -9,9 +9,7 @@ import java.util.*;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
@ -48,10 +48,10 @@ public class PrepareResultCommunitySet {
|
|||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final OrganizationMap organizationMap = new Gson()
|
||||
.fromJson(
|
||||
parser.get("organizationtoresultcommunitymap"),
|
||||
OrganizationMap.class);
|
||||
final String baseURL = parser.get("baseURL");
|
||||
log.info("baseURL: {}", baseURL);
|
||||
|
||||
final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL);
|
||||
log.info("organizationMap: {}", new Gson().toJson(organizationMap));
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -70,7 +70,7 @@ public class PrepareResultCommunitySet {
|
|||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
OrganizationMap organizationMap) {
|
||||
CommunityEntityMap organizationMap) {
|
||||
|
||||
Dataset<Relation> relation = readPath(spark, inputPath, Relation.class);
|
||||
relation.createOrReplaceTempView("relation");
|
||||
|
@ -115,7 +115,7 @@ public class PrepareResultCommunitySet {
|
|||
}
|
||||
|
||||
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
|
||||
OrganizationMap organizationMap) {
|
||||
CommunityEntityMap organizationMap) {
|
||||
return value -> {
|
||||
String rId = value.getResultId();
|
||||
Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.resulttocommunityfromorganization;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import scala.Tuple2;
|
||||
|
@ -53,29 +54,14 @@ public class SparkResultToCommunityFromOrganizationJob {
|
|||
final String possibleupdatespath = parser.get("preparedInfoPath");
|
||||
log.info("preparedInfoPath: {}", possibleupdatespath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final Boolean saveGraph = Optional
|
||||
.ofNullable(parser.get("saveGraph"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
||||
|
||||
runWithSparkHiveSession(
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
if (saveGraph) {
|
||||
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
|
||||
}
|
||||
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -83,22 +69,32 @@ public class SparkResultToCommunityFromOrganizationJob {
|
|||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
Class<R> resultClazz,
|
||||
String possibleUpdatesPath) {
|
||||
|
||||
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
|
||||
Dataset<R> result = readPath(spark, inputPath, resultClazz);
|
||||
|
||||
result
|
||||
.joinWith(
|
||||
possibleUpdates,
|
||||
result.col("id").equalTo(possibleUpdates.col("resultId")),
|
||||
"left_outer")
|
||||
.map(resultCommunityFn(), Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
ModelSupport.entityTypes
|
||||
.keySet()
|
||||
.parallelStream()
|
||||
.forEach(e -> {
|
||||
if (ModelSupport.isResult(e)) {
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
removeOutputDir(spark, outputPath + e.name());
|
||||
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
|
||||
|
||||
result
|
||||
.joinWith(
|
||||
possibleUpdates,
|
||||
result.col("id").equalTo(possibleUpdates.col("resultId")),
|
||||
"left_outer")
|
||||
.map(resultCommunityFn(), Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + e.name());
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
|
||||
package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.types.DataTypes;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
|
||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultOrganizations;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareResultCommunitySet {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySet.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
PrepareResultCommunitySet.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String baseURL = parser.get("baseURL");
|
||||
log.info("baseURL: {}", baseURL);
|
||||
|
||||
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
prepareInfo(spark, inputPath, outputPath, projectsMap);
|
||||
});
|
||||
}
|
||||
|
||||
private static void prepareInfo(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
CommunityEntityMap projectMap) {
|
||||
|
||||
final StructType structureSchema = new StructType()
|
||||
.add(
|
||||
"dataInfo", new StructType()
|
||||
.add("deletedbyinference", DataTypes.BooleanType)
|
||||
.add("invisible", DataTypes.BooleanType))
|
||||
.add("source", DataTypes.StringType)
|
||||
.add("target", DataTypes.StringType)
|
||||
.add("relClass", DataTypes.StringType);
|
||||
|
||||
spark
|
||||
.read()
|
||||
.schema(structureSchema)
|
||||
.json(inputPath)
|
||||
.filter(
|
||||
"dataInfo.deletedbyinference != true " +
|
||||
"and relClass == '" + ModelConstants.IS_PRODUCED_BY + "'")
|
||||
.select(
|
||||
new Column("source").as("resultId"),
|
||||
new Column("target").as("projectId"))
|
||||
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
|
||||
ResultProjectList rpl = new ResultProjectList();
|
||||
rpl.setResultId(k);
|
||||
ArrayList<String> cl = new ArrayList<>();
|
||||
cl.addAll(projectMap.get(v.next().getAs("projectId")));
|
||||
v.forEachRemaining(r -> {
|
||||
projectMap
|
||||
.get(r.getAs("projectId"))
|
||||
.forEach(c -> {
|
||||
if (!cl.contains(c))
|
||||
cl.add(c);
|
||||
});
|
||||
|
||||
});
|
||||
if (cl.size() == 0)
|
||||
return null;
|
||||
rpl.setCommunityList(cl);
|
||||
return rpl;
|
||||
}, Encoders.bean(ResultProjectList.class))
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class ResultProjectList implements Serializable {
|
||||
private String resultId;
|
||||
private ArrayList<String> communityList;
|
||||
|
||||
public String getResultId() {
|
||||
return resultId;
|
||||
}
|
||||
|
||||
public void setResultId(String resultId) {
|
||||
this.resultId = resultId;
|
||||
}
|
||||
|
||||
public ArrayList<String> getCommunityList() {
|
||||
return communityList;
|
||||
}
|
||||
|
||||
public void setCommunityList(ArrayList<String> communityList) {
|
||||
this.communityList = communityList;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
|
||||
package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
|
||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 11/10/23
|
||||
*/
|
||||
public class SparkResultToCommunityFromProject implements Serializable {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityFromProject.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkResultToCommunityFromProject.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String possibleupdatespath = parser.get("preparedInfoPath");
|
||||
log.info("preparedInfoPath: {}", possibleupdatespath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
|
||||
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
private static <R extends Result> void execPropagation(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
|
||||
String possibleUpdatesPath) {
|
||||
|
||||
Dataset<ResultProjectList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultProjectList.class);
|
||||
|
||||
ModelSupport.entityTypes
|
||||
.keySet()
|
||||
.parallelStream()
|
||||
.forEach(e -> {
|
||||
if (ModelSupport.isResult(e)) {
|
||||
removeOutputDir(spark, outputPath + e.name());
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
|
||||
|
||||
result
|
||||
.joinWith(
|
||||
possibleUpdates,
|
||||
result.col("id").equalTo(possibleUpdates.col("resultId")),
|
||||
"left_outer")
|
||||
.map(resultCommunityFn(), Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + e.name());
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultProjectList>, R> resultCommunityFn() {
|
||||
return value -> {
|
||||
R ret = value._1();
|
||||
Optional<ResultProjectList> rcl = Optional.ofNullable(value._2());
|
||||
if (rcl.isPresent()) {
|
||||
// ArrayList<String> communitySet = rcl.get().getCommunityList();
|
||||
List<String> contextList = ret
|
||||
.getContext()
|
||||
.stream()
|
||||
.map(Context::getId)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
R res = (R) ret.getClass().newInstance();
|
||||
|
||||
res.setId(ret.getId());
|
||||
List<Context> propagatedContexts = new ArrayList<>();
|
||||
for (String cId : rcl.get().getCommunityList()) {
|
||||
if (!contextList.contains(cId)) {
|
||||
Context newContext = new Context();
|
||||
newContext.setId(cId);
|
||||
newContext
|
||||
.setDataInfo(
|
||||
Arrays
|
||||
.asList(
|
||||
getDataInfo(
|
||||
PROPAGATION_DATA_INFO_TYPE,
|
||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
|
||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
||||
propagatedContexts.add(newContext);
|
||||
} else {
|
||||
ret
|
||||
.getContext()
|
||||
.stream()
|
||||
.filter(c -> c.getId().equals(cId))
|
||||
.findFirst()
|
||||
.get()
|
||||
.getDataInfo()
|
||||
.add(
|
||||
getDataInfo(
|
||||
PROPAGATION_DATA_INFO_TYPE,
|
||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
|
||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
||||
}
|
||||
}
|
||||
res.setContext(propagatedContexts);
|
||||
ret.mergeFrom(res);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
}
|
|
@ -1,10 +1,4 @@
|
|||
[
|
||||
{
|
||||
"paramName":"is",
|
||||
"paramLongName":"isLookUpUrl",
|
||||
"paramDescription": "URL of the isLookUp Service",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
|
@ -17,12 +11,6 @@
|
|||
"paramDescription": "the json path associated to each selection field",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"tn",
|
||||
"paramLongName":"resultTableName",
|
||||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
|
@ -35,17 +23,16 @@
|
|||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "test",
|
||||
"paramLongName": "isTest",
|
||||
"paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "tg",
|
||||
"paramLongName": "taggingConf",
|
||||
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "bu",
|
||||
"paramLongName": "baseURL",
|
||||
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -4,10 +4,6 @@
|
|||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<description>the isLookup service endpoint</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>pathMap</name>
|
||||
<description>the json path associated to each selection field</description>
|
||||
|
@ -16,21 +12,10 @@
|
|||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the url of the postgress server to query</description>
|
||||
<name>baseURL</name>
|
||||
<description>the community API base URL</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresUser</name>
|
||||
<description>the username to access the postgres db</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresPassword</name>
|
||||
<description>the postgres password</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -102,20 +87,13 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="fork_exec_bulktag"/>
|
||||
<join name="copy_wait" to="exec_bulktag"/>
|
||||
|
||||
<fork name="fork_exec_bulktag">
|
||||
<path start="bulktag_publication"/>
|
||||
<path start="bulktag_dataset"/>
|
||||
<path start="bulktag_otherresearchproduct"/>
|
||||
<path start="bulktag_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="bulktag_publication">
|
||||
<action name="exec_bulktag">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>bulkTagging-publication</name>
|
||||
<name>bulkTagging-result</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -128,100 +106,15 @@
|
|||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--baseURL</arg><arg>${baseURL}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="bulktag_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>bulkTagging-dataset</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="bulktag_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>bulkTagging-orp</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="bulktag_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>bulkTagging-software</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="End"/>
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -1,62 +0,0 @@
|
|||
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
|
||||
let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text()
|
||||
let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept
|
||||
let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept
|
||||
let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept
|
||||
let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text()
|
||||
let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text()
|
||||
let $zenodo := $x//param[./@name='zenodoCommunity']/text()
|
||||
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden'
|
||||
return
|
||||
<community>
|
||||
{ $x//CONFIGURATION/context/@id}
|
||||
<removeConstraints>
|
||||
{$x//CONFIGURATION/context/param[./@name='removeConstraints']/text() }
|
||||
</removeConstraints>
|
||||
<advancedConstraints>
|
||||
{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }
|
||||
</advancedConstraints>
|
||||
<subjects>
|
||||
{for $y in tokenize($subj,',')
|
||||
return
|
||||
<subject>{$y}</subject>}
|
||||
{for $y in tokenize($fos,',')
|
||||
return
|
||||
<subject>{$y}</subject>}
|
||||
{for $y in tokenize($sdg,',')
|
||||
return
|
||||
<subject>{$y}</subject>}
|
||||
</subjects>
|
||||
<datasources>
|
||||
{for $d in $datasources
|
||||
where $d/param[./@name='enabled']/text()='true'
|
||||
return
|
||||
<datasource>
|
||||
<openaireId>
|
||||
{$d//param[./@name='openaireId']/text()}
|
||||
</openaireId>
|
||||
<selcriteria>
|
||||
{$d/param[./@name='selcriteria']/text()}
|
||||
</selcriteria>
|
||||
</datasource> }
|
||||
</datasources>
|
||||
<zenodocommunities>
|
||||
{for $zc in $zenodo
|
||||
return
|
||||
<zenodocommunity>
|
||||
<zenodoid>
|
||||
{$zc}
|
||||
</zenodoid>
|
||||
</zenodocommunity>}
|
||||
{for $zc in $communities
|
||||
return
|
||||
<zenodocommunity>
|
||||
<zenodoid>
|
||||
{$zc/param[./@name='zenodoid']/text()}
|
||||
</zenodoid>
|
||||
<selcriteria>
|
||||
{$zc/param[./@name='selcriteria']/text()}
|
||||
</selcriteria>
|
||||
</zenodocommunity>}
|
||||
</zenodocommunities>
|
||||
</community>
|
|
@ -5,24 +5,7 @@
|
|||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"h",
|
||||
"paramLongName":"hive_metastore_uris",
|
||||
"paramDescription": "the hive metastore uris",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"sg",
|
||||
"paramLongName":"saveGraph",
|
||||
"paramDescription": "true if the new version of the graph must be saved",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"test",
|
||||
"paramLongName":"isTest",
|
||||
"paramDescription": "true if it is executing a test",
|
||||
"paramRequired": false
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
|
@ -35,12 +18,6 @@
|
|||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"tn",
|
||||
"paramLongName":"resultTableName",
|
||||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "preparedInfoPath",
|
||||
|
|
|
@ -5,12 +5,6 @@
|
|||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"ocm",
|
||||
"paramLongName":"organizationtoresultcommunitymap",
|
||||
"paramDescription": "the map for the association organization communities",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"h",
|
||||
"paramLongName":"hive_metastore_uris",
|
||||
|
@ -28,6 +22,12 @@
|
|||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "bu",
|
||||
"paramLongName": "baseURL",
|
||||
"paramDescription": "the base URL to the community API to use",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -4,14 +4,14 @@
|
|||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>organizationtoresultcommunitymap</name>
|
||||
<description>organization community map</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>baseURL</name>
|
||||
<description>the community API base URL</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -93,149 +93,54 @@
|
|||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=6
|
||||
--executor-memory=5G
|
||||
--conf spark.executor.memoryOverhead=3g
|
||||
--conf spark.sql.shuffle.partitions=3284
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg>
|
||||
<arg>--baseURL</arg><arg>${baseURL}</arg>
|
||||
</spark>
|
||||
<ok to="fork-join-exec-propagation"/>
|
||||
<ok to="exec-propagation"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork-join-exec-propagation">
|
||||
<path start="join_propagate_publication"/>
|
||||
<path start="join_propagate_dataset"/>
|
||||
<path start="join_propagate_otherresearchproduct"/>
|
||||
<path start="join_propagate_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="join_propagate_publication">
|
||||
<action name="exec-propagation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>community2resultfromorganization-Publication</name>
|
||||
<name>community2resultfromorganization</name>
|
||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=6
|
||||
--executor-memory=5G
|
||||
--conf spark.executor.memoryOverhead=3g
|
||||
--conf spark.sql.shuffle.partitions=3284
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagate_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>community2resultfromorganization-Dataset</name>
|
||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagate_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>community2resultfromorganization-ORP</name>
|
||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagate_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>community2resultfromorganization-Software</name>
|
||||
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait2" to="End"/>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
[
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "preparedInfoPath",
|
||||
"paramDescription": "the path where prepared info have been stored",
|
||||
"paramRequired": true
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,28 @@
|
|||
[
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "bu",
|
||||
"paramLongName": "baseURL",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,58 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorNumber</name>
|
||||
<value>4</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<value>15G</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<value>6G</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2MaxExecutors</name>
|
||||
<value>50</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,147 @@
|
|||
<workflow-app name="community_to_result_propagation_project" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>baseURL</name>
|
||||
<description>the community API base URL</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="prepare_result_communitylist"/>
|
||||
|
||||
<action name="prepare_result_communitylist">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Prepare-Community-Result-Organization</name>
|
||||
<class>eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=6
|
||||
--executor-memory=5G
|
||||
--conf spark.executor.memoryOverhead=3g
|
||||
--conf spark.sql.shuffle.partitions=3284
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--baseURL</arg><arg>${baseURL}</arg>
|
||||
</spark>
|
||||
<ok to="exec-propagation"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="exec-propagation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>community2resultfromproject</name>
|
||||
<class>eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=6
|
||||
--executor-memory=5G
|
||||
--conf spark.executor.memoryOverhead=3g
|
||||
--conf spark.sql.shuffle.partitions=3284
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -31,8 +31,6 @@ public class BulkTagJobTest {
|
|||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
|
||||
|
||||
public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\","
|
||||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
|
@ -42,7 +40,9 @@ public class BulkTagJobTest {
|
|||
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," +
|
||||
"\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," +
|
||||
"\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " +
|
||||
"\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} ";
|
||||
"\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"," +
|
||||
"\"publisher\":\"$['publisher'].value\"," +
|
||||
"\"publicationyear\":\"$['dateofacceptance'].value\"} ";
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
|
@ -98,14 +98,11 @@ public class BulkTagJobTest {
|
|||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -133,19 +130,16 @@ public class BulkTagJobTest {
|
|||
@Test
|
||||
void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/")
|
||||
.getPath();
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -230,19 +224,19 @@ public class BulkTagJobTest {
|
|||
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
|
||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
|
||||
.getPath();
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -311,18 +305,18 @@ public class BulkTagJobTest {
|
|||
@Test
|
||||
void bulktagByDatasourceTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||
"-outputPath", workingDir.toString() + "/publication",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -384,25 +378,25 @@ public class BulkTagJobTest {
|
|||
void bulktagByZenodoCommunityTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
|
||||
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||
"-outputPath", workingDir.toString() + "/orp",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<OtherResearchProduct> tmp = sc
|
||||
.textFile(workingDir.toString() + "/orp")
|
||||
.textFile(workingDir.toString() + "/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
@ -505,18 +499,18 @@ public class BulkTagJobTest {
|
|||
@Test
|
||||
void bulktagBySubjectDatasourceTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -539,6 +533,7 @@ public class BulkTagJobTest {
|
|||
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||
|
||||
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||
|
||||
Assertions.assertEquals(7, idExplodeCommunity.count());
|
||||
|
||||
Assertions
|
||||
|
@ -636,14 +631,14 @@ public class BulkTagJobTest {
|
|||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(),
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-outputPath", workingDir.toString() + "/software",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -732,18 +727,18 @@ public class BulkTagJobTest {
|
|||
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -774,19 +769,19 @@ public class BulkTagJobTest {
|
|||
void bulkTagOtherJupyter() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct")
|
||||
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||
.getPath();
|
||||
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -829,18 +824,18 @@ public class BulkTagJobTest {
|
|||
public void bulkTagDatasetJupyter() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/jupyter/dataset")
|
||||
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -878,18 +873,18 @@ public class BulkTagJobTest {
|
|||
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/jupyter/software")
|
||||
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-outputPath", workingDir.toString() + "/software",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1096,18 +1091,18 @@ public class BulkTagJobTest {
|
|||
void galaxyOtherTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct")
|
||||
"/eu/dnetlib/dhp/eosctag/galaxy/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1214,18 +1209,18 @@ public class BulkTagJobTest {
|
|||
void galaxySoftwareTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/galaxy/software")
|
||||
"/eu/dnetlib/dhp/eosctag/galaxy/")
|
||||
.getPath();
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-outputPath", workingDir.toString() + "/software",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1333,19 +1328,19 @@ public class BulkTagJobTest {
|
|||
void twitterDatasetTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/twitter/dataset")
|
||||
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||
.getPath();
|
||||
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1373,19 +1368,19 @@ public class BulkTagJobTest {
|
|||
void twitterOtherTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct")
|
||||
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||
.getPath();
|
||||
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1418,19 +1413,19 @@ public class BulkTagJobTest {
|
|||
void twitterSoftwareTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/eosctag/twitter/software")
|
||||
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||
.getPath();
|
||||
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-outputPath", workingDir.toString() + "/software",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1455,19 +1450,19 @@ public class BulkTagJobTest {
|
|||
void EoscContextTagTest() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json")
|
||||
"/eu/dnetlib/dhp/bulktag/eosc/dataset/")
|
||||
.getPath();
|
||||
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
|
@ -1533,16 +1528,16 @@ public class BulkTagJobTest {
|
|||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
|
||||
.getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1568,4 +1563,41 @@ public class BulkTagJobTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
void newConfTest() throws Exception {
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
// "-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-pathMap", pathMap,
|
||||
"-taggingConf", taggingConf
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
|
||||
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
|
||||
|
||||
verificationDataset.createOrReplaceTempView("dataset");
|
||||
|
||||
String query = "select id, MyT.id community "
|
||||
+ "from dataset "
|
||||
+ "lateral view explode(context) c as MyT "
|
||||
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||
|
||||
Assertions.assertEquals(0, spark.sql(query).count());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
|
|||
sc.setVerb("not_contains");
|
||||
sc.setField("contributor");
|
||||
sc.setValue("DARIAH");
|
||||
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
||||
sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
||||
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
|
||||
Assertions.assertFalse(sc.verifyCriteria(metadata));
|
||||
}
|
||||
|
|
|
@ -72,15 +72,13 @@ public class ResultToCommunityJobTest {
|
|||
SparkResultToCommunityFromOrganizationJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample")
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/")
|
||||
.getPath(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-preparedInfoPath", preparedInfoPath
|
||||
});
|
||||
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
|
||||
package eu.dnetlib.dhp.resulttocommunityfromproject;
|
||||
|
||||
import static org.apache.spark.sql.functions.desc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
|
||||
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
|
||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
|
||||
public class ResultToCommunityJobTest {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(OrcidPropagationJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSparkResultToCommunityFromProjectJob() throws Exception {
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
|
||||
.getPath();
|
||||
SparkResultToCommunityFromProject
|
||||
.main(
|
||||
new String[] {
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", getClass()
|
||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
|
||||
.getPath(),
|
||||
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-preparedInfoPath", preparedInfoPath
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
/**
|
||||
* {"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
|
||||
* {"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
|
||||
* {"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
|
||||
* {"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}
|
||||
*/
|
||||
List<Context> context = tmp
|
||||
.filter(r -> r.getId().equals("50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f"))
|
||||
.first()
|
||||
.getContext();
|
||||
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
|
||||
|
||||
context = tmp
|
||||
.filter(r -> r.getId().equals("50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e"))
|
||||
.first()
|
||||
.getContext();
|
||||
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1")).count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1")).count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, tmp.filter(r -> r.getContext().stream().anyMatch(c -> c.getId().equals("aurora"))).count());
|
||||
|
||||
}
|
||||
|
||||
private static boolean containsResultCommunityProject(Context c) {
|
||||
return c
|
||||
.getDataInfo()
|
||||
.stream()
|
||||
.anyMatch(di -> di.getProvenanceaction().getClassid().equals("result:community:project"));
|
||||
}
|
||||
}
|
|
@ -26,7 +26,7 @@
|
|||
<subjects/>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
||||
<openaireId>10|re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -140,39 +140,39 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId>
|
||||
<openaireId>10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId>
|
||||
<openaireId>10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId>
|
||||
<openaireId>10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId>
|
||||
<openaireId>10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId>
|
||||
<openaireId>10|doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId>
|
||||
<openaireId>10|re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::c5502a43e76feab55dd00cf50f519125</openaireId>
|
||||
<openaireId>10|openaire____::c5502a43e76feab55dd00cf50f519125</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::a48f09c562b247a9919acfe195549b47</openaireId>
|
||||
<openaireId>10|re3data_____::a48f09c562b247a9919acfe195549b47</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::97275a23ca44226c9964043c8462be96</openaireId>
|
||||
<openaireId>10|opendoar____::97275a23ca44226c9964043c8462be96</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -287,55 +287,55 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId>
|
||||
<openaireId>10|doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId>
|
||||
<openaireId>10|doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId>
|
||||
<openaireId>10|doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId>
|
||||
<openaireId>10|doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId>
|
||||
<openaireId>10|doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
|
||||
<openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId>
|
||||
<openaireId>10|doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId>
|
||||
<openaireId>10|tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId>
|
||||
<openaireId>10|doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId>
|
||||
<openaireId>10|doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
|
||||
<openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::dca64612dfe0963fffc119098a319957</openaireId>
|
||||
<openaireId>10|doajarticles::dca64612dfe0963fffc119098a319957</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId>
|
||||
<openaireId>10|doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -406,27 +406,27 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
||||
<openaireId>10|re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
|
||||
<openaireId>10|doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
|
||||
<openaireId>10|re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
|
||||
<openaireId>10|doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
||||
<openaireId>10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId>
|
||||
<openaireId>10|rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -743,27 +743,27 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
||||
<openaireId>10|opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
|
||||
<openaireId>10|opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
|
||||
<openaireId>10|opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
|
||||
<openaireId>10|opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
|
||||
<openaireId>10|opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
||||
<openaireId>10|opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -983,11 +983,11 @@
|
|||
<subjects/>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId>
|
||||
<openaireId>10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId>
|
||||
<openaireId>10|opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -1166,87 +1166,87 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId>
|
||||
<openaireId>10|doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId>
|
||||
<openaireId>10|doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId>
|
||||
<openaireId>10|doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::479ca537c12755d1868bbf02938a900c</openaireId>
|
||||
<openaireId>10|doajarticles::479ca537c12755d1868bbf02938a900c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId>
|
||||
<openaireId>10|doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId>
|
||||
<openaireId>10|doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId>
|
||||
<openaireId>10|doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId>
|
||||
<openaireId>10|doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId>
|
||||
<openaireId>10|doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId>
|
||||
<openaireId>10|doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::172050a92511838393a3fe237ae47e31</openaireId>
|
||||
<openaireId>10|doajarticles::172050a92511838393a3fe237ae47e31</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId>
|
||||
<openaireId>10|doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId>
|
||||
<openaireId>10|doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId>
|
||||
<openaireId>10|doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::86faef424d804309ccf45f692523aa48</openaireId>
|
||||
<openaireId>10|doajarticles::86faef424d804309ccf45f692523aa48</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::73bd758fa41671de70964c3ecba013af</openaireId>
|
||||
<openaireId>10|doajarticles::73bd758fa41671de70964c3ecba013af</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId>
|
||||
<openaireId>10|doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId>
|
||||
<openaireId>10|doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId>
|
||||
<openaireId>10|doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId>
|
||||
<openaireId>10|doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId>
|
||||
<openaireId>10|doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -1345,87 +1345,87 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
|
||||
<openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId>
|
||||
<openaireId>10|doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId>
|
||||
<openaireId>10|doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::00017183dc4c858fb77541985323a4ef</openaireId>
|
||||
<openaireId>10|doajarticles::00017183dc4c858fb77541985323a4ef</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId>
|
||||
<openaireId>10|doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId>
|
||||
<openaireId>10|doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId>
|
||||
<openaireId>10|doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId>
|
||||
<openaireId>10|doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId>
|
||||
<openaireId>10|doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::f67ad6d268162079b3abd51a24468744</openaireId>
|
||||
<openaireId>10|doajarticles::f67ad6d268162079b3abd51a24468744</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
|
||||
<openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId>
|
||||
<openaireId>10|doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId>
|
||||
<openaireId>10|doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId>
|
||||
<openaireId>10|doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId>
|
||||
<openaireId>10|doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId>
|
||||
<openaireId>10|doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId>
|
||||
<openaireId>10|doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId>
|
||||
<openaireId>10|doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId>
|
||||
<openaireId>10|doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId>
|
||||
<openaireId>10|doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId>
|
||||
<openaireId>10|doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId>
|
||||
<selcriteria/>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
@ -1454,81 +1454,81 @@
|
|||
</subjects>
|
||||
<datasources>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
|
||||
<openaireId>10|opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
|
||||
<openaireId>10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
|
||||
<openaireId>10|driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
|
||||
<openaireId>10|openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
|
||||
<openaireId>10|openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
|
||||
<openaireId>10|openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
|
||||
<openaireId>10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
|
||||
<openaireId>10|share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
|
||||
<openaireId>10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
|
||||
<openaireId>10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
|
||||
<openaireId>10|re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
<openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
|
||||
<openaireId>10|re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
|
||||
</datasource>
|
||||
</datasources>
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue