Merge branch 'beta' into clean_license_publisher

This commit is contained in:
Claudio Atzori 2023-12-08 16:58:27 +01:00
commit 34abd0fc43
207 changed files with 7723 additions and 1128 deletions

View File

@ -63,7 +63,10 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTermBySynonym(final String syn) { public VocabularyTerm getTermBySynonym(final String syn) {
return getTerm(synonyms.get(syn.toLowerCase())); return Optional
.ofNullable(syn)
.map(s -> getTerm(synonyms.get(s.toLowerCase())))
.orElse(null);
} }
public Qualifier getTermAsQualifier(final String termId) { public Qualifier getTermAsQualifier(final String termId) {

View File

@ -135,6 +135,24 @@ public class VocabularyGroup implements Serializable {
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn); return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
} }
public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
return find(vocId)
.map(
vocabulary -> Optional
.ofNullable(vocabulary.getTerm(syn))
.map(
term -> OafMapperUtils
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
.orElse(
Optional
.ofNullable(vocabulary.getTermBySynonym(syn))
.map(
term -> OafMapperUtils
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
.orElse(null)))
.orElse(null);
}
/** /**
* getSynonymAsQualifierCaseSensitive * getSynonymAsQualifierCaseSensitive
* *

View File

@ -119,6 +119,131 @@ public class AuthorMerger {
}); });
} }
public static String normalizeFullName(final String fullname) {
return nfd(fullname)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
// in case
// of large input strings
.replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim();
}
private static String authorFieldToBeCompared(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {
return author.getSurname();
}
if (StringUtils.isNotBlank(author.getFullname())) {
return author.getFullname();
}
return null;
}
/**
* This method tries to figure out when two author are the same in the contest
* of ORCID enrichment
*
* @param left Author in the OAF entity
* @param right Author ORCID
* @return based on a heuristic on the names of the authors if they are the same.
*/
public static boolean checkORCIDSimilarity(final Author left, final Author right) {
final Person pl = parse(left);
final Person pr = parse(right);
// If one of them didn't have a surname we verify if they have the fullName not empty
// and verify if the normalized version is equal
if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
&& !pr.getFullname().isEmpty()) {
return pl
.getFullname()
.stream()
.anyMatch(
fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
} else {
return false;
}
}
// The Authors have one surname in common
if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
// If one of them has only a surname and is the same we can say that they are the same author
if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
return true;
// The authors have the same initials of Name in common
if (pl
.getName()
.stream()
.anyMatch(
nl -> pr
.getName()
.stream()
.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
return true;
}
// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
// We verify if we have an exact match between name and surname
if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
return true;
else
return false;
}
//
/**
* Method to enrich ORCID information in one list of authors based on another list
*
* @param baseAuthor the Author List in the OAF Entity
* @param orcidAuthor The list of ORCID Author intersected
* @return The Author List of the OAF Entity enriched with the orcid Author
*/
public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
if (baseAuthor == null || baseAuthor.isEmpty())
return orcidAuthor;
if (orcidAuthor == null || orcidAuthor.isEmpty())
return baseAuthor;
if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
return baseAuthor;
final List<Author> oAuthor = new ArrayList<>();
oAuthor.addAll(orcidAuthor);
baseAuthor.forEach(ba -> {
Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
if (aMatch.isPresent()) {
final Author sameAuthor = aMatch.get();
addPid(ba, sameAuthor.getPid());
oAuthor.remove(sameAuthor);
}
});
return baseAuthor;
}
private static void addPid(final Author a, final List<StructuredProperty> pids) {
if (a.getPid() == null) {
a.setPid(new ArrayList<>());
}
a.getPid().addAll(pids);
}
public static String pidToComparableString(StructuredProperty pid) { public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: ""; : "";
@ -171,7 +296,7 @@ public class AuthorMerger {
} }
} }
private static String normalize(final String s) { public static String normalize(final String s) {
String[] normalized = nfd(s) String[] normalized = nfd(s)
.toLowerCase() .toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError // do not compact the regexes in a single expression, would cause StackOverflowError

View File

@ -21,10 +21,15 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
/** /**
@ -35,6 +40,12 @@ public class GroupEntitiesSparkJob {
private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class); private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
private ArgumentApplicationParser parser;
public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
this.parser = parser;
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
@ -51,6 +62,17 @@ public class GroupEntitiesSparkJob {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
}
public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
throws ISLookUpException {
String graphInputPath = parser.get("graphInputPath"); String graphInputPath = parser.get("graphInputPath");
log.info("graphInputPath: {}", graphInputPath); log.info("graphInputPath: {}", graphInputPath);
@ -60,19 +82,21 @@ public class GroupEntitiesSparkJob {
String outputPath = parser.get("outputPath"); String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible")); boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
log.info("filterInvisible: {}", filterInvisible); log.info("filterInvisible: {}", filterInvisible);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses()); conf.registerKryoClasses(ModelSupport.getOafModelClasses());
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration()); HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible); groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
}); });
} }
@ -81,7 +105,7 @@ public class GroupEntitiesSparkJob {
String inputPath, String inputPath,
String checkpointPath, String checkpointPath,
String outputPath, String outputPath,
boolean filterInvisible) { boolean filterInvisible, VocabularyGroup vocs) {
Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC); Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
@ -106,10 +130,14 @@ public class GroupEntitiesSparkJob {
} }
Dataset<?> groupedEntities = allEntities Dataset<?> groupedEntities = allEntities
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) (b, a) -> OafMapperUtils.mergeEntities(b, a))
.map( .map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2( (MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
.applyCoarVocabularies(entity, vocs),
OAFENTITY_KRYO_ENC)
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
.map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
t._2().getClass().getName(), t._2()), t._2().getClass().getName(), t._2()),
Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC)); Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.net.MalformedURLException; import java.net.MalformedURLException;
@ -889,4 +891,105 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s; return s;
} }
public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
if (entity instanceof Result) {
final Result result = (Result) entity;
Optional
.ofNullable(result.getInstance())
.ifPresent(
instances -> instances
.forEach(
instance -> {
if (Objects.isNull(instance.getInstanceTypeMapping())) {
List<InstanceTypeMapping> mapping = Lists.newArrayList();
mapping
.add(
OafMapperUtils
.instanceTypeMapping(
instance.getInstancetype().getClassname(),
OPENAIRE_COAR_RESOURCE_TYPES_3_1));
instance.setInstanceTypeMapping(mapping);
}
Optional<InstanceTypeMapping> optionalItm = instance
.getInstanceTypeMapping()
.stream()
.filter(GraphCleaningFunctions::originalResourceType)
.findFirst();
if (optionalItm.isPresent()) {
InstanceTypeMapping coarItm = optionalItm.get();
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
.ifPresent(type -> {
coarItm.setTypeCode(type.getClassid());
coarItm.setTypeLabel(type.getClassname());
});
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
.ifPresent(
type -> mappings
.add(
OafMapperUtils
.instanceTypeMapping(coarItm.getTypeCode(), type)));
}
if (!mappings.isEmpty()) {
instance.getInstanceTypeMapping().addAll(mappings);
}
}
}));
result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
}
return entity;
}
private static boolean originalResourceType(InstanceTypeMapping itm) {
return StringUtils.isNotBlank(itm.getOriginalType()) &&
OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
StringUtils.isBlank(itm.getTypeCode()) &&
StringUtils.isBlank(itm.getTypeLabel());
}
private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
return Optional
.ofNullable(instances)
.map(ii -> {
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
Optional<InstanceTypeMapping> itm = ii
.stream()
.filter(Objects::nonNull)
.flatMap(
i -> Optional
.ofNullable(i.getInstanceTypeMapping())
.map(Collection::stream)
.orElse(Stream.empty()))
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
.findFirst();
if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
return null;
} else {
final String typeCode = itm.get().getTypeCode();
return Optional
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
.orElseThrow(
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
OPENAIRE_META_RESOURCE_TYPE));
}
} else {
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
}
})
.orElse(null);
}
} }

View File

@ -14,7 +14,6 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.AccessRightComparator; import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -141,6 +140,28 @@ public class OafMapperUtils {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
String vocabularyName) {
final InstanceTypeMapping m = new InstanceTypeMapping();
m.setVocabularyName(vocabularyName);
m.setOriginalType(originalType);
m.setTypeCode(code);
m.setTypeLabel(label);
return m;
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
}
public static InstanceTypeMapping instanceTypeMapping(String originalType) {
return instanceTypeMapping(originalType, null, null, null);
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
return instanceTypeMapping(originalType, null, null, vocabularyName);
}
public static Qualifier unknown(final String schemeid, final String schemename) { public static Qualifier unknown(final String schemeid, final String schemename) {
return qualifier(UNKNOWN, "Unknown", schemeid, schemename); return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
} }

View File

@ -28,5 +28,11 @@
"paramLongName": "filterInvisible", "paramLongName": "filterInvisible",
"paramDescription": "if true filters out invisible entities", "paramDescription": "if true filters out invisible entities",
"paramRequired": true "paramRequired": true
},
{
"paramName": "isu",
"paramLongName": "isLookupUrl",
"paramDescription": "url to the ISLookup Service",
"paramRequired": true
} }
] ]

View File

@ -0,0 +1,114 @@
package eu.dnetlib.oa.merge;
import static org.junit.jupiter.api.Assertions.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Objects;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.oaf.Author;
public class AuthorMergerTest {
@Test
public void testEnrcichAuthor() throws Exception {
final ObjectMapper mapper = new ObjectMapper();
BufferedReader pr = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
BufferedReader or = new BufferedReader(new InputStreamReader(
Objects
.requireNonNull(
AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
};
String pubLine;
int i = 0;
while ((pubLine = pr.readLine()) != null) {
final String pubId = pubLine;
final String MatchPidOrcid = or.readLine();
final String pubOrcid = or.readLine();
final String data = pr.readLine();
if (StringUtils.isNotBlank(data)) {
List<Author> publicationAuthors = mapper.readValue(data, aclass);
List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
System.out.printf("OAF ID = %s \n", pubId);
System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
long originalAuthorWithPiD = publicationAuthors
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long start = System.currentTimeMillis();
// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
long enrichedAuthorWithPid = enrichedList
.stream()
.filter(
a -> a.getPid() != null && a
.getPid()
.stream()
.anyMatch(
p -> p.getQualifier() != null
&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
.count();
long totalTime = (System.currentTimeMillis() - start) / 1000;
System.out
.printf(
"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
enrichedAuthorWithPid);
System.out.println("=================");
}
}
}
@Test
public void checkSimilarityTest() {
final Author left = new Author();
left.setName("Anand");
left.setSurname("Rachna");
left.setFullname("Anand, Rachna");
System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
final Author right = new Author();
right.setName("Rachna");
right.setSurname("Anand");
right.setFullname("Rachna, Anand");
// System.out.println(AuthorMerger.normalize(right.getFullname()));
boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
assertTrue(same);
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class DownloadORCIDDumpApplication {
private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
private final FileSystem fileSystem;
public DownloadORCIDDumpApplication(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final String apiURL = argumentParser.get("apiURL");
log.info("apiURL is {}", apiURL);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
}
private void downloadItem(final String name, final String itemURL, final String basePath) {
try {
final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
final HttpGet request = new HttpGet(itemURL);
final int timeout = 60; // seconds
final RequestConfig config = RequestConfig
.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000)
.build();
log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
CloseableHttpResponse response = client.execute(request)) {
int responseCode = response.getStatusLine().getStatusCode();
log.info("Response code is {}", responseCode);
if (responseCode >= 200 && responseCode < 400) {
IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
}
} catch (Throwable eu) {
throw new RuntimeException(eu);
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
protected void run(final String targetPath, final String apiURL) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final URL url = new URL(apiURL);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
final String json = IOUtils.toString(is);
JsonNode jsonNode = mapper.readTree(json);
jsonNode
.get("files")
.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
}
}

View File

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.collection.orcid;
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class ExtractORCIDDump {
private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
private final FileSystem fileSystem;
public ExtractORCIDDump(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
DownloadORCIDDumpApplication.class
.getResourceAsStream(
"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
argumentParser.parseArgument(args);
final String hdfsuri = argumentParser.get("namenode");
log.info("hdfsURI is {}", hdfsuri);
final String sourcePath = argumentParser.get("sourcePath");
log.info("sourcePath is {}", sourcePath);
final String targetPath = argumentParser.get("targetPath");
log.info("targetPath is {}", targetPath);
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
}
public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
final List<ORCIDExtractor> workers = new ArrayList<>();
int i = 0;
while (ls.hasNext()) {
LocatedFileStatus current = ls.next();
if (current.getPath().getName().endsWith("tar.gz")) {
workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
}
}
workers.forEach(Thread::start);
for (ORCIDExtractor worker : workers) {
worker.join();
}
}
}

View File

@ -0,0 +1,171 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**\
* The ORCIDExtractor class extracts ORCID data from a TAR archive.
* The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
* Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
* Finally, it closes all the SequenceFile.Writer objects.
*/
public class ORCIDExtractor extends Thread {
private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
private final FileSystem fileSystem;
private final String id;
private final Path sourcePath;
private final String baseOutputPath;
public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
this.fileSystem = fileSystem;
this.id = id;
this.sourcePath = sourcePath;
this.baseOutputPath = baseOutputPath;
}
/**
* creates a map of SequenceFile.Writer objects,
* one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
* For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
* object that writes employment data.
* @return the Map
*/
private Map<String, SequenceFile.Writer> createMap() {
try {
log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
Map<String, SequenceFile.Writer> res = new HashMap<>();
if (sourcePath.getName().contains("summaries")) {
final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
final SequenceFile.Writer summary_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(summaryPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
log.info("Thread {} Creating only summary path here {}", id, summaryPath);
res.put("summary", summary_file);
return res;
} else {
String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
final SequenceFile.Writer employments_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(employmentsPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("employments", employments_file);
log.info("Thread {} Creating employments path here {}", id, employmentsPath);
final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
final SequenceFile.Writer works_file = SequenceFile
.createWriter(
fileSystem.getConf(),
SequenceFile.Writer.file(new Path(worksPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
res.put("works", works_file);
log.info("Thread {} Creating works path here {}", id, worksPath);
return res;
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
@Override
public void run() {
CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
CompressionCodec codec = factory.getCodec(sourcePath);
if (codec == null) {
System.err.println("No codec found for " + sourcePath.getName());
System.exit(1);
}
InputStream gzipInputStream = null;
try {
gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
final Map<String, SequenceFile.Writer> fileMap = createMap();
iterateTar(fileMap, gzipInputStream);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
log.info("Closing gzip stream");
IOUtils.closeStream(gzipInputStream);
}
}
private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
if (sourcePath.getName().contains("summaries")) {
return fileMap.get("summary");
}
if (path.contains("works")) {
return fileMap.get("works");
}
if (path.contains("employments"))
return fileMap.get("employments");
return null;
}
private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
int extractedItem = 0;
try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry;
while ((entry = tais.getNextTarEntry()) != null) {
if (entry.isFile()) {
final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
if (fl != null) {
final Text key = new Text(entry.getName());
final Text value = new Text(
org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
fl.append(key, value);
extractedItem++;
if (extractedItem % 100000 == 0) {
log.info("Thread {}: Extracted {} items", id, extractedItem);
break;
}
}
}
}
} finally {
for (SequenceFile.Writer k : fileMap.values()) {
log.info("Thread {}: Completed processed {} items", id, extractedItem);
k.hflush();
k.close();
}
}
}
}

View File

@ -0,0 +1,251 @@
package eu.dnetlib.dhp.collection.orcid;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ximpleware.*;
import eu.dnetlib.dhp.collection.orcid.model.*;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
public class OrcidParser {
final Logger log = LoggerFactory.getLogger(OrcidParser.class);
private VTDNav vn;
private AutoPilot ap;
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
private static final String NS_COMMON = "common";
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
private static final String NS_PERSON = "person";
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
private static final String NS_DETAILS = "personal-details";
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
private static final String NS_OTHER = "other-name";
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
private static final String NS_RECORD = "record";
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
private static final String NS_ACTIVITIES = "activities";
private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
private static final String NS_WORK = "work";
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
private static final String NS_ERROR = "error";
private static final String NS_HISTORY = "history";
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
private static final String NS_BULK = "bulk";
private static final String NS_EXTERNAL = "external-identifier";
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
private void generateParsedDocument(final String xml) throws ParseException {
final VTDGen vg = new VTDGen();
vg.setDoc(xml.getBytes());
vg.parse(true);
this.vn = vg.getNav();
this.ap = new AutoPilot(vn);
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
}
public Author parseSummary(final String xml) {
try {
final Author author = new Author();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//record:record", Arrays.asList("path"));
if (!recordNodes.isEmpty()) {
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
author.setOrcid(oid);
} else {
return null;
}
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//person:name", Arrays.asList("visibility"));
final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
author.setVisibility(visibility);
final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
author.setGivenName(name);
final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
author.setFamilyName(surnames);
final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
author.setCreditName(creditNames);
final String biography = VtdUtilityParser
.getSingleValue(ap, vn, "//person:biography/personal-details:content");
author.setBiography(biography);
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
if (!otherNames.isEmpty()) {
author.setOtherNames(otherNames);
}
ap.selectXPath("//external-identifier:external-identifier");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
author.addOtherPid(pid);
}
return author;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
public Work parseWork(final String xml) {
try {
final Work work = new Work();
generateParsedDocument(xml);
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
if (!workNodes.isEmpty()) {
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
work.setOrcid(oid);
} else {
return null;
}
ap.selectXPath("//common:external-id");
while (ap.evalXPath() != -1) {
final Pid pid = new Pid();
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:external-id-type");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setSchema(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:external-id-value");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
pid.setValue(vn.toNormalizedString(it));
}
work.addPid(pid);
}
work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
return work;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
private String extractEmploymentDate(final String xpath) throws Exception {
ap.selectXPath(xpath);
StringBuilder sb = new StringBuilder();
while (ap.evalXPath() != -1) {
final AutoPilot ap1 = new AutoPilot(ap.getNav());
ap1.selectXPath("./common:year");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:month");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
ap1.selectXPath("./common:day");
while (ap1.evalXPath() != -1) {
int it = vn.getText();
sb.append("-");
sb.append(vn.toNormalizedString(it));
}
}
return sb.toString();
}
public Employment parseEmployment(final String xml) {
try {
final Employment employment = new Employment();
generateParsedDocument(xml);
final String oid = VtdUtilityParser
.getSingleValue(ap, vn, "//common:source-orcid/common:path");
if (StringUtils.isNotBlank(oid)) {
employment.setOrcid(oid);
} else {
return null;
}
final String depName = VtdUtilityParser
.getSingleValue(ap, vn, "//common:department-name");
final String rolTitle = VtdUtilityParser
.getSingleValue(ap, vn, "//common:role-title");
if (StringUtils.isNotBlank(rolTitle))
employment.setRoleTitle(rolTitle);
if (StringUtils.isNotBlank(depName))
employment.setDepartmentName(depName);
else
employment
.setDepartmentName(
VtdUtilityParser
.getSingleValue(ap, vn, "//common:organization/common:name"));
employment.setStartDate(extractEmploymentDate("//common:start-date"));
employment.setEndDate(extractEmploymentDate("//common:end-date"));
final String affiliationId = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
final String affiliationIdType = VtdUtilityParser
.getSingleValue(ap, vn, "//common:disambiguation-source");
if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
return employment;
} catch (Throwable e) {
log.error("Error on parsing {}", xml);
log.error(e.getMessage());
return null;
}
}
}

View File

@ -0,0 +1,83 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Author extends ORCIDItem {
private String givenName;
private String familyName;
private String visibility;
private String creditName;
private List<String> otherNames;
private List<Pid> otherPids;
private String biography;
public String getBiography() {
return biography;
}
public void setBiography(String biography) {
this.biography = biography;
}
public String getGivenName() {
return givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public String getFamilyName() {
return familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public String getCreditName() {
return creditName;
}
public void setCreditName(String creditName) {
this.creditName = creditName;
}
public List<String> getOtherNames() {
return otherNames;
}
public void setOtherNames(List<String> otherNames) {
this.otherNames = otherNames;
}
public String getVisibility() {
return visibility;
}
public void setVisibility(String visibility) {
this.visibility = visibility;
}
public List<Pid> getOtherPids() {
return otherPids;
}
public void setOtherPids(List<Pid> otherPids) {
this.otherPids = otherPids;
}
public void addOtherPid(final Pid pid) {
if (otherPids == null)
otherPids = new ArrayList<>();
otherPids.add(pid);
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Employment extends ORCIDItem {
private String startDate;
private String EndDate;
private Pid affiliationId;
private String departmentName;
private String roleTitle;
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return EndDate;
}
public void setEndDate(String endDate) {
EndDate = endDate;
}
public Pid getAffiliationId() {
return affiliationId;
}
public void setAffiliationId(Pid affiliationId) {
this.affiliationId = affiliationId;
}
public String getDepartmentName() {
return departmentName;
}
public void setDepartmentName(String departmentName) {
this.departmentName = departmentName;
}
public String getRoleTitle() {
return roleTitle;
}
public void setRoleTitle(String roleTitle) {
this.roleTitle = roleTitle;
}
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class ORCIDItem {
private String orcid;
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.collection.orcid.model;
public class Pid {
private String value;
private String schema;
public Pid() {
}
public Pid(String value, String schema) {
this.value = value;
this.schema = schema;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getSchema() {
return schema;
}
public void setSchema(String schema) {
this.schema = schema;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.collection.orcid.model;
import java.util.ArrayList;
import java.util.List;
public class Work extends ORCIDItem {
private String title;
private List<Pid> pids;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public List<Pid> getPids() {
return pids;
}
public void setPids(List<Pid> pids) {
this.pids = pids;
}
public void addPid(Pid pid) {
if (pids == null)
pids = new ArrayList<>();
pids.add(pid);
}
}

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH to extract files",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH where the tar.gz files were downloaded",
"paramRequired": true
}
]

View File

@ -0,0 +1,21 @@
[
{
"paramName": "m",
"paramLongName": "master",
"paramDescription": "the master name",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH of the DF tables",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the PATH of the ORCID sequence file",
"paramRequired": true
}
]

View File

@ -0,0 +1,23 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,81 @@
<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>targetPath</name>
<description>the path to store the original ORCID dump</description>
</property>
<property>
<name>apiURL</name>
<description>The figshare API URL to retrieve the list file to download</description>
</property>
</parameters>
<start to="generateTables"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="DownloadDUMP">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--apiURL</arg><arg>${apiURL}</arg>
</java>
<ok to="extractDump"/>
<error to="Kill"/>
</action>
<action name="extractDump">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
<java-opts> -Xmx6g </java-opts>
<arg>--namenode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${targetPath}</arg>
<arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
</java>
<ok to="generateTables"/>
<error to="Kill"/>
</action>
<action name="generateTables">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Generate ORCID Tables</name>
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}/tables</arg>
<arg>--master</arg><arg>yarn</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,21 @@
[
{
"paramName": "n",
"paramLongName": "namenode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "t",
"paramLongName": "targetPath",
"paramDescription": "the target PATH where download the files",
"paramRequired": true
},
{
"paramName": "a",
"paramLongName": "apiURL",
"paramDescription": "the FIGSHARE API id URL to retrieve all the dump files",
"paramRequired": true
}
]

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.collection.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
import org.apache.hadoop.io.Text
import org.apache.spark.SparkContext
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = {
val sourcePath: String = parser.get("sourcePath")
log.info("found parameters sourcePath: {}", sourcePath)
val targetPath: String = parser.get("targetPath")
log.info("found parameters targetPath: {}", targetPath)
extractORCIDTable(spark, sourcePath, targetPath)
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
extractORCIDWorksTable(spark, sourcePath, targetPath)
}
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("summaries"))
.map { r =>
val p = new OrcidParser
p.parseSummary(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Authors")
}
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("works"))
.map { r =>
val p = new OrcidParser
p.parseWork(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Works")
}
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val df = sc
.sequenceFile(sourcePath, classOf[Text], classOf[Text])
.map { case (x, y) => (x.toString, y.toString) }
.toDF
.as[(String, String)]
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
df.filter(r => r._1.contains("employments"))
.map { r =>
val p = new OrcidParser
p.parseEmployment(r._2)
}
.filter(p => p != null)
.write
.mode(SaveMode.Overwrite)
.save(s"$targetPath/Employments")
}
}
object SparkGenerateORCIDTable {
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
def main(args: Array[String]): Unit = {
new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
.initialize()
.run()
}
}

View File

@ -166,7 +166,7 @@ object DataciteToOAFTransformation {
resourceTypeGeneral: String, resourceTypeGeneral: String,
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): (Qualifier, Qualifier) = { ): (Qualifier, Qualifier, String) = {
if (resourceType != null && resourceType.nonEmpty) { if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier = val typeQualifier =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ),
resourceType
) )
} }
if (schemaOrg != null && schemaOrg.nonEmpty) { if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ),
schemaOrg
) )
} }
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ),
resourceTypeGeneral
) )
} }
@ -216,12 +219,18 @@ object DataciteToOAFTransformation {
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): Result = { ): Result = {
val typeQualifiers: (Qualifier, Qualifier) = val typeQualifiers: (Qualifier, Qualifier, String) =
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
if (typeQualifiers == null) if (typeQualifiers == null)
return null return null
val i = new Instance val i = new Instance
i.setInstancetype(typeQualifiers._1) i.setInstancetype(typeQualifiers._1)
// ADD ORIGINAL TYPE
val itm = new InstanceTypeMapping
itm.setOriginalType(typeQualifiers._3)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
typeQualifiers._2.getClassname match { typeQualifiers._2.getClassname match {
case "dataset" => case "dataset" =>
val r = new OafDataset val r = new OafDataset

View File

@ -176,7 +176,7 @@ object BioDBToOAF {
i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava) i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava)
} }
if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) {
i.setInstancetype( i.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0037", "0037",
@ -185,7 +185,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
else val itm = new InstanceTypeMapping
itm.setOriginalType(input.pidType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
} else {
i.setInstancetype( i.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
@ -194,6 +198,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
}
if (input.datasource == null || input.datasource.isEmpty) if (input.datasource == null || input.datasource.isEmpty)
return null return null
@ -265,6 +274,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("uniprot")) i.setCollectedfrom(collectedFromMap("uniprot"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
@ -471,6 +484,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("pdb")) i.setCollectedfrom(collectedFromMap("pdb"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
@ -571,6 +588,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("ebi")) i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)

View File

@ -188,13 +188,24 @@ object PubMedToOaf {
val cojbCategory = val cojbCategory =
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue)
pubmedInstance.setInstancetype(cojbCategory) pubmedInstance.setInstancetype(cojbCategory)
// ADD ORIGINAL TYPE to the publication
val itm = new InstanceTypeMapping
itm.setOriginalType(ja.get.getValue)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else { } else {
val i_type = article.getPublicationTypes.asScala val i_type = article.getPublicationTypes.asScala
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.find(q => q != null) .find(q => q._2 != null)
if (i_type.isDefined)
pubmedInstance.setInstancetype(i_type.get) if (i_type.isDefined) {
else pubmedInstance.setInstancetype(i_type.get._2)
// ADD ORIGINAL TYPE to the publication
val itm = new InstanceTypeMapping
itm.setOriginalType(i_type.get._1)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else
return null return null
} }
val result = createResult(pubmedInstance.getInstancetype, vocabularies) val result = createResult(pubmedInstance.getInstancetype, vocabularies)

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.collection.orcid;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.dnetlib.dhp.collection.orcid.model.Author;
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
import eu.dnetlib.dhp.parser.utility.VtdException;
public class DownloadORCIDTest {
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
@Test
public void testSummary() throws Exception {
final String xml = IOUtils
.toString(
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
final OrcidParser parser = new OrcidParser();
ORCIDItem orcidItem = parser.parseSummary(xml);
final ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(orcidItem));
}
@Test
public void testParsingWork() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
@Test
public void testParsingEmployments() throws Exception {
final List<String> works_path = Arrays
.asList(
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
);
final OrcidParser parser = new OrcidParser();
final ObjectMapper mapper = new ObjectMapper();
works_path.stream().map(s -> {
try {
return IOUtils
.toString(
Objects
.requireNonNull(
getClass()
.getResourceAsStream(
s)));
} catch (IOException e) {
throw new RuntimeException(e);
}
}).forEach(s -> {
try {
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
}

View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
<common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
<common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
</common:source>
<work:title>
<common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
</work:title>
<work:journal-title>American Journal of Neuroradiology</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2014</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
<common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-84911865199</common:external-id-value>
<common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Durst, C.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Starke, R.M.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Gaughen, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Nguyen, Q.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Patrie, J.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Jensen, M.E.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Evans, A.J.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
path="/0000-0001-5349-4030/work/50101152" visibility="public">
<common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
<common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
</common:source>
<work:title>
<common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
racially homogenous and heterogeneous U.S. history classrooms</common:title>
</work:title>
<work:journal-title>Journal of Social Studies Research</work:journal-title>
<work:citation>
<work:citation-type>bibtex</work:citation-type>
<work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
candor in addressing social injustices in racially homogenous and heterogeneous U.S.
history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
= {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2018</common:year>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>doi</common:external-id-type>
<common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
<common:external-id-normalized transient="true"
>10.1016/j.jssr.2018.01.004</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>eid</common:external-id-type>
<common:external-id-value>2-s2.0-85041949043</common:external-id-value>
<common:external-id-normalized transient="true"
>2-s2.0-85041949043</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Parkhouse, H.</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>Massaro, V.R.</work:credit-name>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Abdel-Dayem K</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Abdel-Dayem Fake</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:credit-name>Khair Abde Daye</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Eweda II</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>El-Sherbiny A</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Dimitry MO</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>Nammas W</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>first</work:contributor-sequence>
<work:contributor-role>author</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<work:work xmlns:address="http://www.orcid.org/ns/address"
xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work"
xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
path="/0000-0003-2760-1191/work/28776099" visibility="public">
<common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
<common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
<common:path>0000-0002-9157-3431</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Europe PubMed Central</common:source-name>
</common:source>
<work:title>
<common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
ST-Segment-Elevation Myocardial Infarction.</common:title>
</work:title>
<work:citation>
<work:citation-type>formatted-unspecified</work:citation-type>
<work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
</work:citation>
<work:type>journal-article</work:type>
<common:publication-date>
<common:year>2016</common:year>
<common:month>11</common:month>
</common:publication-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>pmid</common:external-id-type>
<common:external-id-value>27899851</common:external-id-value>
<common:external-id-normalized transient="true">27899851</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
<common:external-id>
<common:external-id-type>pmc</common:external-id-type>
<common:external-id-value>PMC5126442</common:external-id-value>
<common:external-id-normalized transient="true"
>PMC5126442</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>http://europepmc.org/abstract/med/27899851</common:url>
<work:contributors>
<work:contributor>
<work:contributor-attributes>
<work:contributor-sequence>seq0</work:contributor-sequence>
<work:contributor-role>role0</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname1</work:credit-name>
</work:contributor>
<work:contributor>
<work:credit-name>creditname2</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq2</work:contributor-sequence>
<work:contributor-role></work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name>creditname3</work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence></work:contributor-sequence>
<work:contributor-role>role3</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
<work:contributor>
<work:credit-name></work:credit-name>
<work:contributor-attributes>
<work:contributor-sequence>seq4</work:contributor-sequence>
<work:contributor-role>role4</work:contributor-role>
</work:contributor-attributes>
</work:contributor>
</work:contributors>
</work:work>

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
visibility="public">
<common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
<common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
<common:path>0000-0001-5010-5001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Quang Nguyen</common:source-name>
</common:source>
<common:organization>
<common:name>Beth Israel Deaconess Medical Center</common:name>
<common:address>
<common:city>Boston</common:city>
<common:region>MA</common:region>
<common:country>US</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
visibility="public">
<common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
<common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
<common:path>0000-0001-5011-3001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>zhengyan li</common:source-name>
</common:source>
<common:start-date>
<common:year>2008</common:year>
<common:month>09</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Anhui Academy of Agricultural Sciences</common:name>
<common:address>
<common:city>Hefei</common:city>
<common:region>Anhui</common:region>
<common:country>CN</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
xmlns:history="http://www.orcid.org/ns/history"
xmlns:employment="http://www.orcid.org/ns/employment"
xmlns:education="http://www.orcid.org/ns/education"
xmlns:other-name="http://www.orcid.org/ns/other-name"
xmlns:deprecated="http://www.orcid.org/ns/deprecated"
xmlns:funding="http://www.orcid.org/ns/funding"
xmlns:research-resource="http://www.orcid.org/ns/research-resource"
xmlns:service="http://www.orcid.org/ns/service"
xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
xmlns:distinction="http://www.orcid.org/ns/distinction"
xmlns:internal="http://www.orcid.org/ns/internal"
xmlns:membership="http://www.orcid.org/ns/membership"
xmlns:person="http://www.orcid.org/ns/person"
xmlns:personal-details="http://www.orcid.org/ns/personal-details"
xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
xmlns:activities="http://www.orcid.org/ns/activities"
xmlns:qualification="http://www.orcid.org/ns/qualification"
xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
xmlns:error="http://www.orcid.org/ns/error"
xmlns:preferences="http://www.orcid.org/ns/preferences"
xmlns:invited-position="http://www.orcid.org/ns/invited-position"
xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
visibility="public">
<common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
<common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
<common:path>0000-0001-5012-1001</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Asma Bazzi</common:source-name>
</common:source>
<common:department-name>Pathology and Laboratory Medicine</common:department-name>
<common:role-title>Medical Laboratory Technologist</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>01</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>06</common:month>
<common:day>30</common:day>
</common:end-date>
<common:organization>
<common:name>American University of Beirut</common:name>
<common:address>
<common:city>Hamra</common:city>
<common:region>Beirut</common:region>
<common:country>LB</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment>

View File

@ -0,0 +1,581 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
<common:orcid-identifier>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:orcid-identifier>
<preferences:preferences>
<preferences:locale>es</preferences:locale>
</preferences:preferences>
<history:history>
<history:creation-method>Direct</history:creation-method>
<history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<history:claimed>true</history:claimed>
<history:verified-email>true</history:verified-email>
<history:verified-primary-email>true</history:verified-primary-email>
</history:history>
<person:person path="/0000-0001-5045-1000/person">
<person:name visibility="public" path="0000-0001-5045-1000">
<common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
<common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
<personal-details:given-names>Patricio</personal-details:given-names>
<personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
</person:name>
<other-name:other-names path="/0000-0001-5045-1000/other-names"/>
<person:biography visibility="public" path="/0000-0001-5045-1000/biography">
<common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
<common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
<personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
</person:biography>
<researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
<email:emails path="/0000-0001-5045-1000/email"/>
<address:addresses path="/0000-0001-5045-1000/address"/>
<keyword:keywords path="/0000-0001-5045-1000/keywords"/>
<external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
<common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
<common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
<common:source>
<common:source-client-id>
<common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
<common:path>0000-0002-5982-8983</common:path>
<common:host>orcid.org</common:host>
</common:source-client-id>
<common:source-name>Scopus - Elsevier</common:source-name>
<common:assertion-origin-orcid>
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
<common:path>0000-0001-7291-3210</common:path>
<common:host>orcid.org</common:host>
</common:assertion-origin-orcid>
<common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
</common:source>
<common:external-id-type>Scopus Author ID</common:external-id-type>
<common:external-id-value>6602255248</common:external-id-value>
<common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
<common:external-id-relationship>self</common:external-id-relationship>
</external-identifier:external-identifier>
</external-identifier:external-identifiers>
</person:person>
<activities:activities-summary path="/0000-0001-5045-1000/activities">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:distinctions path="/0000-0001-5045-1000/distinctions">
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:external-ids/>
<distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
<common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
<common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Maestría</common:department-name>
<common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</distinction:distinction-summary>
</activities:affiliation-group>
</activities:distinctions>
<activities:educations path="/0000-0001-5045-1000/educations">
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:external-ids/>
<education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
<common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
<common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Doctorado en Sociología</common:department-name>
<common:role-title>Doctorando del Programa de Sociología</common:role-title>
<common:start-date>
<common:year>2020</common:year>
<common:month>11</common:month>
<common:day>06</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
<common:address>
<common:city>Madrid</common:city>
<common:region>Comunidad de Madrid</common:region>
<common:country>ES</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</education:education-summary>
</activities:affiliation-group>
</activities:educations>
<activities:employments path="/0000-0001-5045-1000/employments">
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
<common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
<common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Especialista de Proyectos y docente</common:role-title>
<common:start-date>
<common:year>2021</common:year>
<common:month>11</common:month>
<common:day>01</common:day>
</common:start-date>
<common:organization>
<common:name>Universidad de las Artes</common:name>
<common:address>
<common:city>Guayaquil</common:city>
<common:region>Guayas</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</employment:employment-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:external-ids/>
<employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
<common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
<common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
<common:role-title>Director</common:role-title>
<common:start-date>
<common:year>2019</common:year>
<common:month>11</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>2021</common:year>
<common:month>10</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Regional Amazónica IKIAM</common:name>
<common:address>
<common:city>Tena</common:city>
<common:region>Napo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
<common:url>http://ikiam.edu.ec</common:url>
</employment:employment-summary>
</activities:affiliation-group>
</activities:employments>
<activities:fundings path="/0000-0001-5045-1000/fundings"/>
<activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
<activities:memberships path="/0000-0001-5045-1000/memberships">
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:external-ids/>
<membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
<common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
<common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Artes Escénicas</common:department-name>
<common:role-title>Miembro</common:role-title>
<common:start-date>
<common:year>2000</common:year>
<common:month>07</common:month>
<common:day>15</common:day>
</common:start-date>
<common:organization>
<common:name>Casa de la Cultura Ecuatoriana</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Sierra Centro</common:region>
<common:country>EC</common:country>
</common:address>
</common:organization>
</membership:membership-summary>
</activities:affiliation-group>
</activities:memberships>
<activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
<activities:qualifications path="/0000-0001-5045-1000/qualifications">
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
<common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
<common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Programa de Gobernabilidad</common:department-name>
<common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
<common:start-date>
<common:year>2014</common:year>
<common:month>10</common:month>
<common:day>20</common:day>
</common:start-date>
<common:end-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:end-date>
<common:organization>
<common:name>Instituto de Altos Estudios Nacionales</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
<common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
<common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Posgrados</common:department-name>
<common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
<common:start-date>
<common:year>2001</common:year>
<common:month>03</common:month>
<common:day>09</common:day>
</common:start-date>
<common:end-date>
<common:year>2003</common:year>
<common:month>02</common:month>
<common:day>27</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
<common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
<common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Ciencias de la Educación</common:department-name>
<common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
<common:start-date>
<common:year>1994</common:year>
<common:month>10</common:month>
<common:day>03</common:day>
</common:start-date>
<common:end-date>
<common:year>2000</common:year>
<common:month>01</common:month>
<common:day>31</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Nacional de Chimborazo</common:name>
<common:address>
<common:city>Riobamba</common:city>
<common:region>Chimborazo</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
<common:disambiguation-source>ROR</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
<activities:affiliation-group>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:external-ids/>
<qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
<common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
<common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<common:department-name>Facultad de Artes</common:department-name>
<common:role-title>Licenciado en Artes</common:role-title>
<common:start-date>
<common:year>1989</common:year>
<common:month>09</common:month>
<common:day>05</common:day>
</common:start-date>
<common:end-date>
<common:year>1997</common:year>
<common:month>08</common:month>
<common:day>07</common:day>
</common:end-date>
<common:organization>
<common:name>Universidad Central del Ecuador</common:name>
<common:address>
<common:city>Quito</common:city>
<common:region>Pichincha</common:region>
<common:country>EC</common:country>
</common:address>
<common:disambiguated-organization>
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
</common:disambiguated-organization>
</common:organization>
</qualification:qualification-summary>
</activities:affiliation-group>
</activities:qualifications>
<activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
<activities:services path="/0000-0001-5045-1000/services"/>
<activities:works path="/0000-0001-5045-1000/works">
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<activities:group>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
<common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>isbn</common:external-id-type>
<common:external-id-value>978-9942-29-089-2</common:external-id-value>
<common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
<common:external-id-relationship>part-of</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:type>book-chapter</work:type>
<common:publication-date>
<common:year>2023</common:year>
<common:month>06</common:month>
<common:day>07</common:day>
</common:publication-date>
<work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
<common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
<common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>conference-abstract</work:type>
<common:publication-date>
<common:year>2022</common:year>
<common:month>10</common:month>
<common:day>06</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
<common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
</work:title>
<common:external-ids>
<common:external-id>
<common:external-id-type>other-id</common:external-id-type>
<common:external-id-value>2018</common:external-id-value>
<common:external-id-normalized transient="true">2018</common:external-id-normalized>
<common:external-id-relationship>self</common:external-id-relationship>
</common:external-id>
</common:external-ids>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>conference-poster</work:type>
<common:publication-date>
<common:year>2018</common:year>
<common:month>11</common:month>
<common:day>30</common:day>
</common:publication-date>
<work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
<common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
<common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
</work:title>
<common:external-ids/>
<work:type>dissertation-thesis</work:type>
<common:publication-date>
<common:year>2017</common:year>
<common:month>01</common:month>
<common:day>26</common:day>
</common:publication-date>
</work:work-summary>
</activities:group>
<activities:group>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:external-ids/>
<work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
<common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
<common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
<common:source>
<common:source-orcid>
<common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
<common:path>0000-0001-5045-1000</common:path>
<common:host>orcid.org</common:host>
</common:source-orcid>
<common:source-name>Patricio Sánchez Quinchuela</common:source-name>
</common:source>
<work:title>
<common:title>La Rebelión de los Dioses</common:title>
</work:title>
<common:external-ids/>
<common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
<work:type>registered-copyright</work:type>
<common:publication-date>
<common:year>2001</common:year>
<common:month>08</common:month>
<common:day>28</common:day>
</common:publication-date>
<work:journal-title>Editorial pedagógica freire</work:journal-title>
</work:work-summary>
</activities:group>
</activities:works>
</activities:activities-summary>
</record:record>

View File

@ -16,6 +16,10 @@
<name>filterInvisible</name> <name>filterInvisible</name>
<description>whether filter out invisible entities after merge</description> <description>whether filter out invisible entities after merge</description>
</property> </property>
<property>
<name>isLookupUrl</name>
<description>the URL address of the lookUp service</description>
</property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>
<description>heap memory for driver process</description> <description>heap memory for driver process</description>
@ -128,6 +132,7 @@
<arg>--graphInputPath</arg><arg>${graphBasePath}</arg> <arg>--graphInputPath</arg><arg>${graphBasePath}</arg>
<arg>--checkpointPath</arg><arg>${workingPath}/grouped_entities</arg> <arg>--checkpointPath</arg><arg>${workingPath}/grouped_entities</arg>
<arg>--outputPath</arg><arg>${graphOutputPath}</arg> <arg>--outputPath</arg><arg>${graphOutputPath}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--filterInvisible</arg><arg>${filterInvisible}</arg> <arg>--filterInvisible</arg><arg>${filterInvisible}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>

View File

@ -133,32 +133,6 @@
<arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg> <arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
<arg>--master</arg><arg>yarn-cluster</arg> <arg>--master</arg><arg>yarn-cluster</arg>
</spark> </spark>
<ok to="PreProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="PreProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -59,10 +59,10 @@
</property> </property>
<!-- ORCID Parameters --> <!-- ORCID Parameters -->
<property> <!-- <property>-->
<name>workingPathOrcid</name> <!-- <name>workingPathOrcid</name>-->
<description>the ORCID working path</description> <!-- <description>the ORCID working path</description>-->
</property> <!-- </property>-->
</parameters> </parameters>
@ -84,7 +84,6 @@
<case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case> <case to="End">${wf:conf('resumeFrom') eq 'Skip'}</case>
<case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case> <case to="ProcessMAG">${wf:conf('resumeFrom') eq 'PreprocessMag'}</case>
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case> <case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
<case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case> <case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
<case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case> <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
<default to="ConvertCrossrefToOAF"/> <default to="ConvertCrossrefToOAF"/>
@ -170,32 +169,6 @@
<arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg> <arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg> <arg>--master</arg><arg>yarn-cluster</arg>
</spark> </spark>
<ok to="ProcessORCID"/>
<error to="Kill"/>
</action>
<!-- ORCID SECTION -->
<action name="ProcessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
<arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="CreateDOIBoost"/> <ok to="CreateDOIBoost"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
Encoders.tuple(Encoders.STRING, mapEncoderPub) Encoders.tuple(Encoders.STRING, mapEncoderPub)
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation] implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
logger.info("Phase 2) Join Crossref with UnpayWall") logger.info("Phase 1) Join Crossref with UnpayWall")
val crossrefPublication: Dataset[(String, Publication)] = val crossrefPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p)) spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
.write .write
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.save(s"$workingDirPath/firstJoin") .save(s"$workingDirPath/firstJoin")
logger.info("Phase 3) Join Result with ORCID")
val fj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val orcidPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
.map(applyMerge)
.write
.mode(SaveMode.Overwrite)
.save(s"$workingDirPath/secondJoin")
logger.info("Phase 4) Join Result with MAG") logger.info("Phase 2) Join Result with MAG")
val sj: Dataset[(String, Publication)] = val sj: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val magPublication: Dataset[(String, Publication)] = val magPublication: Dataset[(String, Publication)] =
spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))

View File

@ -107,7 +107,7 @@ case object Crossref2Oaf {
.map(f => f.id) .map(f => f.id)
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
@ -283,6 +283,11 @@ case object Crossref2Oaf {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
//ADD ORIGINAL TYPE to the mapping
val itm = new InstanceTypeMapping
itm.setOriginalType(originalType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
instance.setInstanceTypeMapping(List(itm).asJava)
result.setResourcetype( result.setResourcetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
cobjCategory.substring(0, 4), cobjCategory.substring(0, 4),
@ -367,7 +372,9 @@ case object Crossref2Oaf {
objectType, objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type") mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
) )
mappingResult(result, json, cOBJCategory)
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
mappingResult(result, json, cOBJCategory, originalType)
if (result == null || result.getId == null) if (result == null || result.getId == null)
return List() return List()

View File

@ -71,6 +71,9 @@ public class PropagationConstant {
public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID = "result:community:organization"; public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID = "result:community:organization";
public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME = " Propagation of result belonging to community through organization"; public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME = " Propagation of result belonging to community through organization";
public static final String PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID = "result:community:project";
public static final String PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME = " Propagation of result belonging to community through project";
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";

View File

@ -0,0 +1,80 @@
package eu.dnetlib.dhp.api;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import org.jetbrains.annotations.NotNull;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class QueryCommunityAPI {
private static String get(String geturl) throws IOException {
URL url = new URL(geturl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
conn.setRequestMethod("GET");
int responseCode = conn.getResponseCode();
String body = getBody(conn);
conn.disconnect();
if (responseCode != HttpURLConnection.HTTP_OK)
throw new IOException("Unexpected code " + responseCode + body);
return body;
}
public static String communities(String baseURL) throws IOException {
return get(baseURL + "communities");
}
public static String community(String id, String baseURL) throws IOException {
return get(baseURL + id);
}
public static String communityDatasource(String id, String baseURL) throws IOException {
return get(baseURL + id + "/contentproviders");
}
public static String communityPropagationOrganization(String id, String baseURL) throws IOException {
return get(baseURL + id + "/propagationOrganizations");
}
public static String communityProjects(String id, String page, String size, String baseURL) throws IOException {
return get(baseURL + id + "/projects/" + page + "/" + size);
}
@NotNull
private static String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
body = response.toString();
}
return body;
}
}

View File

@ -0,0 +1,170 @@
package eu.dnetlib.dhp.api;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import javax.management.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.api.model.*;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class Utils implements Serializable {
private static final ObjectMapper MAPPER = new ObjectMapper();
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
private static final Logger log = LoggerFactory.getLogger(Utils.class);
public static CommunityConfiguration getCommunityConfiguration(String baseURL) throws IOException {
final Map<String, Community> communities = Maps.newHashMap();
List<Community> validCommunities = new ArrayList<>();
getValidCommunities(baseURL)
.forEach(community -> {
try {
CommunityModel cm = MAPPER
.readValue(QueryCommunityAPI.community(community.getId(), baseURL), CommunityModel.class);
validCommunities.add(getCommunity(cm));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
validCommunities.forEach(community -> {
try {
DatasourceList dl = MAPPER
.readValue(
QueryCommunityAPI.communityDatasource(community.getId(), baseURL), DatasourceList.class);
community.setProviders(dl.stream().map(d -> {
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
return null;
Provider p = new Provider();
p.setOpenaireId("10|" + d.getOpenaireId());
p.setSelectionConstraints(d.getSelectioncriteria());
if (p.getSelectionConstraints() != null)
p.getSelectionConstraints().setSelection(resolver);
return p;
})
.filter(Objects::nonNull)
.collect(Collectors.toList()));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
validCommunities.forEach(community -> {
if (community.isValid())
communities.put(community.getId(), community);
});
return new CommunityConfiguration(communities);
}
private static Community getCommunity(CommunityModel cm) {
Community c = new Community();
c.setId(cm.getId());
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
c.getZenodoCommunities().add(cm.getZenodoCommunity());
c.setSubjects(cm.getSubjects());
c.getSubjects().addAll(cm.getFos());
c.getSubjects().addAll(cm.getSdg());
if (cm.getAdvancedConstraints() != null) {
c.setConstraints(cm.getAdvancedConstraints());
c.getConstraints().setSelection(resolver);
}
if (cm.getRemoveConstraints() != null) {
c.setRemoveConstraints(cm.getRemoveConstraints());
c.getRemoveConstraints().setSelection(resolver);
}
return c;
}
public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException {
return MAPPER
.readValue(QueryCommunityAPI.communities(baseURL), CommunitySummary.class)
.stream()
.filter(
community -> !community.getStatus().equals("hidden") &&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
/**
* it returns for each organization the list of associated communities
*/
public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities(baseURL)
.forEach(community -> {
String id = community.getId();
try {
List<String> associatedOrgs = MAPPER
.readValue(
QueryCommunityAPI.communityPropagationOrganization(id, baseURL), OrganizationList.class);
associatedOrgs.forEach(o -> {
if (!organizationMap
.keySet()
.contains(
"20|" + o))
organizationMap.put("20|" + o, new ArrayList<>());
organizationMap.get("20|" + o).add(community.getId());
});
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap;
}
public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException {
CommunityEntityMap projectMap = new CommunityEntityMap();
getValidCommunities(baseURL)
.forEach(community -> {
int page = -1;
int size = 100;
ContentModel cm = new ContentModel();
do {
page++;
try {
cm = MAPPER
.readValue(
QueryCommunityAPI
.communityProjects(
community.getId(), String.valueOf(page), String.valueOf(size), baseURL),
ContentModel.class);
if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> {
if (!projectMap.keySet().contains("40|" + p.getOpenaireId()))
projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>());
projectMap.get("40|" + p.getOpenaireId()).add(community.getId());
});
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} while (!cm.getLast());
});
return projectMap;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
@JsonAutoDetect
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityContentprovider {
private String openaireId;
private SelectionConstraints selectioncriteria;
private String enabled;
public String getEnabled() {
return enabled;
}
public void setEnabled(String enabled) {
this.enabled = enabled;
}
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(final String openaireId) {
this.openaireId = openaireId;
}
public SelectionConstraints getSelectioncriteria() {
return this.selectioncriteria;
}
public void setSelectioncriteria(SelectionConstraints selectioncriteria) {
this.selectioncriteria = selectioncriteria;
}
}

View File

@ -1,13 +1,13 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization; package eu.dnetlib.dhp.api.model;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
public class OrganizationMap extends HashMap<String, List<String>> { public class CommunityEntityMap extends HashMap<String, List<String>> {
public OrganizationMap() { public CommunityEntityMap() {
super(); super();
} }

View File

@ -0,0 +1,108 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityModel implements Serializable {
private String id;
private String type;
private String status;
private String zenodoCommunity;
private List<String> subjects;
private List<String> otherZenodoCommunities;
private List<String> fos;
private List<String> sdg;
private SelectionConstraints advancedConstraints;
private SelectionConstraints removeConstraints;
public String getZenodoCommunity() {
return zenodoCommunity;
}
public void setZenodoCommunity(String zenodoCommunity) {
this.zenodoCommunity = zenodoCommunity;
}
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public List<String> getOtherZenodoCommunities() {
return otherZenodoCommunities;
}
public void setOtherZenodoCommunities(List<String> otherZenodoCommunities) {
this.otherZenodoCommunities = otherZenodoCommunities;
}
public List<String> getFos() {
return fos;
}
public void setFos(List<String> fos) {
this.fos = fos;
}
public List<String> getSdg() {
return sdg;
}
public void setSdg(List<String> sdg) {
this.sdg = sdg;
}
public SelectionConstraints getRemoveConstraints() {
return removeConstraints;
}
public void setRemoveConstraints(SelectionConstraints removeConstraints) {
this.removeConstraints = removeConstraints;
}
public SelectionConstraints getAdvancedConstraints() {
return advancedConstraints;
}
public void setAdvancedConstraints(SelectionConstraints advancedConstraints) {
this.advancedConstraints = advancedConstraints;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
public CommunitySummary() {
super();
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ContentModel implements Serializable {
private List<ProjectModel> content;
private Integer totalPages;
private Boolean last;
private Integer number;
public List<ProjectModel> getContent() {
return content;
}
public void setContent(List<ProjectModel> content) {
this.content = content;
}
public Integer getTotalPages() {
return totalPages;
}
public void setTotalPages(Integer totalPages) {
this.totalPages = totalPages;
}
public Boolean getLast() {
return last;
}
public void setLast(Boolean last) {
this.last = last;
}
public Integer getNumber() {
return number;
}
public void setNumber(Integer number) {
this.number = number;
}
}

View File

@ -0,0 +1,13 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.ArrayList;
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
public DatasourceList() {
super();
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class OrganizationList extends ArrayList<String> implements Serializable {
public OrganizationList() {
super();
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ProjectModel implements Serializable {
private String openaireId;
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(String openaireId) {
this.openaireId = openaireId;
}
}

View File

@ -9,7 +9,6 @@ import java.util.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -21,8 +20,11 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ -54,50 +56,39 @@ public class SparkBulkTagJob {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
Boolean isTest = Optional
.ofNullable(parser.get("isTest"))
.map(Boolean::valueOf)
.orElse(Boolean.FALSE);
log.info("isTest: {} ", isTest);
final String inputPath = parser.get("sourcePath"); final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath); log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final String baseURL = parser.get("baseURL");
log.info("baseURL: {}", baseURL);
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class); ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
log.info("pathMap: {}", new Gson().toJson(protoMappingParams)); log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final Boolean saveGraph = Optional
.ofNullable(parser.get("saveGraph"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("saveGraph: {}", saveGraph);
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
CommunityConfiguration cc; CommunityConfiguration cc;
String taggingConf = parser.get("taggingConf"); String taggingConf = Optional
.ofNullable(parser.get("taggingConf"))
.map(String::valueOf)
.orElse(null);
if (isTest) { if (taggingConf != null) {
cc = CommunityConfigurationFactory.newInstance(taggingConf); cc = CommunityConfigurationFactory.newInstance(taggingConf);
} else { } else {
cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); cc = Utils.getCommunityConfiguration(baseURL);
log.info(OBJECT_MAPPER.writeValueAsString(cc));
} }
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath);
extendCommunityConfigurationForEOSC(spark, inputPath, cc); extendCommunityConfigurationForEOSC(spark, inputPath, cc);
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc); execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc);
}); });
} }
@ -106,10 +97,7 @@ public class SparkBulkTagJob {
Dataset<String> datasources = readPath( Dataset<String> datasources = readPath(
spark, inputPath spark, inputPath
.substring( + "datasource",
0,
inputPath.lastIndexOf("/"))
+ "/datasource",
Datasource.class) Datasource.class)
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds)) .filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING()); .map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
@ -117,10 +105,10 @@ public class SparkBulkTagJob {
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap(); Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
for (String ds : datasources.collectAsList()) { for (String ds : datasources.collectAsList()) {
final String dsId = ds.substring(3); // final String dsId = ds.substring(3);
if (!dsm.containsKey(dsId)) { if (!dsm.containsKey(ds)) {
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>(); ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
dsm.put(dsId, eoscList); dsm.put(ds, eoscList);
} }
} }
@ -142,11 +130,17 @@ public class SparkBulkTagJob {
String inputPath, String inputPath,
String outputPath, String outputPath,
ProtoMap protoMappingParams, ProtoMap protoMappingParams,
Class<R> resultClazz,
CommunityConfiguration communityConfiguration) { CommunityConfiguration communityConfiguration) {
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger(); ResultTagger resultTagger = new ResultTagger();
readPath(spark, inputPath, resultClazz) Class<R> resultClazz = ModelSupport.entityTypes.get(e);
readPath(spark, inputPath + e.name(), resultClazz)
.map(patchResult(), Encoders.bean(resultClazz)) .map(patchResult(), Encoders.bean(resultClazz))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map( .map(
@ -157,7 +151,9 @@ public class SparkBulkTagJob {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath + e.name());
});
} }
public static <R> Dataset<R> readPath( public static <R> Dataset<R> readPath(

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Optional;
import com.google.gson.Gson; import com.google.gson.Gson;
@ -13,7 +14,7 @@ public class Community implements Serializable {
private String id; private String id;
private List<String> subjects = new ArrayList<>(); private List<String> subjects = new ArrayList<>();
private List<Provider> providers = new ArrayList<>(); private List<Provider> providers = new ArrayList<>();
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>(); private List<String> zenodoCommunities = new ArrayList<>();
private SelectionConstraints constraints = new SelectionConstraints(); private SelectionConstraints constraints = new SelectionConstraints();
private SelectionConstraints removeConstraints = new SelectionConstraints(); private SelectionConstraints removeConstraints = new SelectionConstraints();
@ -26,7 +27,7 @@ public class Community implements Serializable {
return !getSubjects().isEmpty() return !getSubjects().isEmpty()
|| !getProviders().isEmpty() || !getProviders().isEmpty()
|| !getZenodoCommunities().isEmpty() || !getZenodoCommunities().isEmpty()
|| getConstraints().getCriteria() != null; || (Optional.ofNullable(getConstraints()).isPresent() && getConstraints().getCriteria() != null);
} }
public String getId() { public String getId() {
@ -53,11 +54,11 @@ public class Community implements Serializable {
this.providers = providers; this.providers = providers;
} }
public List<ZenodoCommunity> getZenodoCommunities() { public List<String> getZenodoCommunities() {
return zenodoCommunities; return zenodoCommunities;
} }
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) { public void setZenodoCommunities(List<String> zenodoCommunities) {
this.zenodoCommunities = zenodoCommunities; this.zenodoCommunities = zenodoCommunities;
} }

View File

@ -81,7 +81,7 @@ public class CommunityConfiguration implements Serializable {
this.removeConstraintsMap = removeConstraintsMap; this.removeConstraintsMap = removeConstraintsMap;
} }
CommunityConfiguration(final Map<String, Community> communities) { public CommunityConfiguration(final Map<String, Community> communities) {
this.communities = communities; this.communities = communities;
init(); init();
} }
@ -117,10 +117,10 @@ public class CommunityConfiguration implements Serializable {
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap); add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
} }
// get zenodo communities // get zenodo communities
for (ZenodoCommunity zc : c.getZenodoCommunities()) { for (String zc : c.getZenodoCommunities()) {
add( add(
zc.getZenodoCommunityId(), zc,
new Pair<>(id, zc.getSelCriteria()), new Pair<>(id, null),
zenodocommunityMap); zenodocommunityMap);
} }
selectionConstraintsMap.put(id, c.getConstraints()); selectionConstraintsMap.put(id, c.getConstraints());

View File

@ -143,16 +143,16 @@ public class CommunityConfigurationFactory {
return providerList; return providerList;
} }
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) { private static List<String> parseZenodoCommunities(final Node node) {
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity"); final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>(); final List<String> zenodoCommunityList = new ArrayList<>();
for (Node n : list) { for (Node n : list) {
ZenodoCommunity zc = new ZenodoCommunity(); // ZenodoCommunity zc = new ZenodoCommunity();
zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); // zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
zc.setSelCriteria(n.selectSingleNode("./selcriteria")); // zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
zenodoCommunityList.add(zc); zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText());
} }
log.info("size of the zenodo community list " + zenodoCommunityList.size()); log.info("size of the zenodo community list " + zenodoCommunityList.size());

View File

@ -4,6 +4,8 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
@ -12,6 +14,7 @@ public class Constraint implements Serializable {
private String field; private String field;
private String value; private String value;
// private String element; // private String element;
@JsonIgnore
private Selection selection; private Selection selection;
public String getVerb() { public String getVerb() {
@ -38,10 +41,11 @@ public class Constraint implements Serializable {
this.value = value; this.value = value;
} }
public void setSelection(Selection sel) { //@JsonIgnore
selection = sel; // public void setSelection(Selection sel) {
} // selection = sel;
// }
@JsonIgnore
public void setSelection(VerbResolver resolver) public void setSelection(VerbResolver resolver)
throws InvocationTargetException, NoSuchMethodException, InstantiationException, throws InvocationTargetException, NoSuchMethodException, InstantiationException,
IllegalAccessException { IllegalAccessException {
@ -52,11 +56,4 @@ public class Constraint implements Serializable {
return selection.apply(metadata); return selection.apply(metadata);
} }
// public String getElement() {
// return element;
// }
//
// public void setElement(String element) {
// this.element = element;
// }
} }

View File

@ -1,34 +0,0 @@
package eu.dnetlib.dhp.bulktag.community;
import java.io.IOException;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
throws ISLookUpException, DocumentException, SAXException, IOException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
final List<String> res = isLookUp
.quickSearchProfile(
IOUtils
.toString(
QueryInformationSystem.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/query.xq")));
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
return CommunityConfigurationFactory.newInstance(xmlConf);
}
}

View File

@ -82,11 +82,15 @@ public class ResultTagger implements Serializable {
// communities contains all the communities to be not added to the context // communities contains all the communities to be not added to the context
final Set<String> removeCommunities = new HashSet<>(); final Set<String> removeCommunities = new HashSet<>();
// if (conf.getRemoveConstraintsMap().keySet().size() > 0)
conf conf
.getRemoveConstraintsMap() .getRemoveConstraintsMap()
.keySet() .keySet()
.forEach(communityId -> { .forEach(
if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && communityId -> {
// log.info("Remove constraints for " + communityId);
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
conf conf
.getRemoveConstraintsMap() .getRemoveConstraintsMap()
.get(communityId) .get(communityId)
@ -124,10 +128,10 @@ public class ResultTagger implements Serializable {
if (Objects.nonNull(result.getInstance())) { if (Objects.nonNull(result.getInstance())) {
for (Instance i : result.getInstance()) { for (Instance i : result.getInstance()) {
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); collfrom.add(i.getCollectedfrom().getKey());
} }
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); hostdby.add(i.getHostedby().getKey());
} }
} }

View File

@ -7,11 +7,13 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken; import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
@JsonAutoDetect
public class SelectionConstraints implements Serializable { public class SelectionConstraints implements Serializable {
private List<Constraints> criteria; private List<Constraints> criteria;

View File

@ -9,9 +9,7 @@ import java.util.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
@ -48,10 +48,10 @@ public class PrepareResultCommunitySet {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final OrganizationMap organizationMap = new Gson() final String baseURL = parser.get("baseURL");
.fromJson( log.info("baseURL: {}", baseURL);
parser.get("organizationtoresultcommunitymap"),
OrganizationMap.class); final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL);
log.info("organizationMap: {}", new Gson().toJson(organizationMap)); log.info("organizationMap: {}", new Gson().toJson(organizationMap));
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
@ -70,7 +70,7 @@ public class PrepareResultCommunitySet {
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,
String outputPath, String outputPath,
OrganizationMap organizationMap) { CommunityEntityMap organizationMap) {
Dataset<Relation> relation = readPath(spark, inputPath, Relation.class); Dataset<Relation> relation = readPath(spark, inputPath, Relation.class);
relation.createOrReplaceTempView("relation"); relation.createOrReplaceTempView("relation");
@ -115,7 +115,7 @@ public class PrepareResultCommunitySet {
} }
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn( private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
OrganizationMap organizationMap) { CommunityEntityMap organizationMap) {
return value -> { return value -> {
String rId = value.getResultId(); String rId = value.getResultId();
Optional<List<String>> orgs = Optional.ofNullable(value.getMerges()); Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization; package eu.dnetlib.dhp.resulttocommunityfromorganization;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2; import scala.Tuple2;
@ -53,29 +54,14 @@ public class SparkResultToCommunityFromOrganizationJob {
final String possibleupdatespath = parser.get("preparedInfoPath"); final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath); log.info("preparedInfoPath: {}", possibleupdatespath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final Boolean saveGraph = Optional
.ofNullable(parser.get("saveGraph"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("saveGraph: {}", saveGraph);
@SuppressWarnings("unchecked")
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath); execPropagation(spark, inputPath, outputPath, possibleupdatespath);
if (saveGraph) {
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
}
}); });
} }
@ -83,11 +69,18 @@ public class SparkResultToCommunityFromOrganizationJob {
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,
String outputPath, String outputPath,
Class<R> resultClazz,
String possibleUpdatesPath) { String possibleUpdatesPath) {
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class); Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
Dataset<R> result = readPath(spark, inputPath, resultClazz);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
removeOutputDir(spark, outputPath + e.name());
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
result result
.joinWith( .joinWith(
@ -98,7 +91,10 @@ public class SparkResultToCommunityFromOrganizationJob {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath + e.name());
}
});
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {

View File

@ -0,0 +1,123 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultOrganizations;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
public class PrepareResultCommunitySet {
private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySet.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultCommunitySet.class
.getResourceAsStream(
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String baseURL = parser.get("baseURL");
log.info("baseURL: {}", baseURL);
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
prepareInfo(spark, inputPath, outputPath, projectsMap);
});
}
private static void prepareInfo(
SparkSession spark,
String inputPath,
String outputPath,
CommunityEntityMap projectMap) {
final StructType structureSchema = new StructType()
.add(
"dataInfo", new StructType()
.add("deletedbyinference", DataTypes.BooleanType)
.add("invisible", DataTypes.BooleanType))
.add("source", DataTypes.StringType)
.add("target", DataTypes.StringType)
.add("relClass", DataTypes.StringType);
spark
.read()
.schema(structureSchema)
.json(inputPath)
.filter(
"dataInfo.deletedbyinference != true " +
"and relClass == '" + ModelConstants.IS_PRODUCED_BY + "'")
.select(
new Column("source").as("resultId"),
new Column("target").as("projectId"))
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
ResultProjectList rpl = new ResultProjectList();
rpl.setResultId(k);
ArrayList<String> cl = new ArrayList<>();
cl.addAll(projectMap.get(v.next().getAs("projectId")));
v.forEachRemaining(r -> {
projectMap
.get(r.getAs("projectId"))
.forEach(c -> {
if (!cl.contains(c))
cl.add(c);
});
});
if (cl.size() == 0)
return null;
rpl.setCommunityList(cl);
return rpl;
}, Encoders.bean(ResultProjectList.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
}

View File

@ -0,0 +1,26 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import java.io.Serializable;
import java.util.ArrayList;
public class ResultProjectList implements Serializable {
private String resultId;
private ArrayList<String> communityList;
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public ArrayList<String> getCommunityList() {
return communityList;
}
public void setCommunityList(ArrayList<String> communityList) {
this.communityList = communityList;
}
}

View File

@ -0,0 +1,163 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 11/10/23
*/
public class SparkResultToCommunityFromProject implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityFromProject.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkResultToCommunityFromProject.class
.getResourceAsStream(
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
});
}
private static <R extends Result> void execPropagation(
SparkSession spark,
String inputPath,
String outputPath,
String possibleUpdatesPath) {
Dataset<ResultProjectList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultProjectList.class);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.forEach(e -> {
if (ModelSupport.isResult(e)) {
removeOutputDir(spark, outputPath + e.name());
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
result
.joinWith(
possibleUpdates,
result.col("id").equalTo(possibleUpdates.col("resultId")),
"left_outer")
.map(resultCommunityFn(), Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + e.name());
}
});
}
private static <R extends Result> MapFunction<Tuple2<R, ResultProjectList>, R> resultCommunityFn() {
return value -> {
R ret = value._1();
Optional<ResultProjectList> rcl = Optional.ofNullable(value._2());
if (rcl.isPresent()) {
// ArrayList<String> communitySet = rcl.get().getCommunityList();
List<String> contextList = ret
.getContext()
.stream()
.map(Context::getId)
.collect(Collectors.toList());
@SuppressWarnings("unchecked")
R res = (R) ret.getClass().newInstance();
res.setId(ret.getId());
List<Context> propagatedContexts = new ArrayList<>();
for (String cId : rcl.get().getCommunityList()) {
if (!contextList.contains(cId)) {
Context newContext = new Context();
newContext.setId(cId);
newContext
.setDataInfo(
Arrays
.asList(
getDataInfo(
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS)));
propagatedContexts.add(newContext);
} else {
ret
.getContext()
.stream()
.filter(c -> c.getId().equals(cId))
.findFirst()
.get()
.getDataInfo()
.add(
getDataInfo(
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS));
}
}
res.setContext(propagatedContexts);
ret.mergeFrom(res);
}
return ret;
};
}
}

View File

@ -1,10 +1,4 @@
[ [
{
"paramName":"is",
"paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": true
},
{ {
"paramName":"s", "paramName":"s",
"paramLongName":"sourcePath", "paramLongName":"sourcePath",
@ -17,12 +11,6 @@
"paramDescription": "the json path associated to each selection field", "paramDescription": "the json path associated to each selection field",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -35,17 +23,16 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{
"paramName": "test",
"paramLongName": "isTest",
"paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded",
"paramRequired": false
},
{ {
"paramName": "tg", "paramName": "tg",
"paramLongName": "taggingConf", "paramLongName": "taggingConf",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
"paramRequired": false "paramRequired": false
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": false
} }
] ]

View File

@ -4,10 +4,6 @@
<name>sourcePath</name> <name>sourcePath</name>
<description>the source path</description> <description>the source path</description>
</property> </property>
<property>
<name>isLookUpUrl</name>
<description>the isLookup service endpoint</description>
</property>
<property> <property>
<name>pathMap</name> <name>pathMap</name>
<description>the json path associated to each selection field</description> <description>the json path associated to each selection field</description>
@ -16,21 +12,10 @@
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property> <property>
<name>postgresURL</name> <name>baseURL</name>
<description>the url of the postgress server to query</description> <description>the community API base URL</description>
</property> </property>
<property>
<name>postgresUser</name>
<description>the username to access the postgres db</description>
</property>
<property>
<name>postgresPassword</name>
<description>the postgres password</description>
</property>
</parameters> </parameters>
<global> <global>
@ -102,20 +87,13 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="copy_wait" to="fork_exec_bulktag"/> <join name="copy_wait" to="exec_bulktag"/>
<fork name="fork_exec_bulktag"> <action name="exec_bulktag">
<path start="bulktag_publication"/>
<path start="bulktag_dataset"/>
<path start="bulktag_otherresearchproduct"/>
<path start="bulktag_software"/>
</fork>
<action name="bulktag_publication">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master> <master>yarn-cluster</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>bulkTagging-publication</name> <name>bulkTagging-result</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class> <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -128,100 +106,15 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg> <arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <arg>--baseURL</arg><arg>${baseURL}</arg>
</spark> </spark>
<ok to="wait"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="bulktag_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-dataset</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="bulktag_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-orp</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="bulktag_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-software</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
</action>
<join name="wait" to="End"/>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>

View File

@ -1,62 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text()
let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept
let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept
let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept
let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text()
let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text()
let $zenodo := $x//param[./@name='zenodoCommunity']/text()
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden'
return
<community>
{ $x//CONFIGURATION/context/@id}
<removeConstraints>
{$x//CONFIGURATION/context/param[./@name='removeConstraints']/text() }
</removeConstraints>
<advancedConstraints>
{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }
</advancedConstraints>
<subjects>
{for $y in tokenize($subj,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($fos,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($sdg,',')
return
<subject>{$y}</subject>}
</subjects>
<datasources>
{for $d in $datasources
where $d/param[./@name='enabled']/text()='true'
return
<datasource>
<openaireId>
{$d//param[./@name='openaireId']/text()}
</openaireId>
<selcriteria>
{$d/param[./@name='selcriteria']/text()}
</selcriteria>
</datasource> }
</datasources>
<zenodocommunities>
{for $zc in $zenodo
return
<zenodocommunity>
<zenodoid>
{$zc}
</zenodoid>
</zenodocommunity>}
{for $zc in $communities
return
<zenodocommunity>
<zenodoid>
{$zc/param[./@name='zenodoid']/text()}
</zenodoid>
<selcriteria>
{$zc/param[./@name='selcriteria']/text()}
</selcriteria>
</zenodocommunity>}
</zenodocommunities>
</community>

View File

@ -5,24 +5,7 @@
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the path of the sequencial file to read",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"h",
"paramLongName":"hive_metastore_uris",
"paramDescription": "the hive metastore uris",
"paramRequired": true
},
{
"paramName":"sg",
"paramLongName":"saveGraph",
"paramDescription": "true if the new version of the graph must be saved",
"paramRequired": false
},
{
"paramName":"test",
"paramLongName":"isTest",
"paramDescription": "true if it is executing a test",
"paramRequired": false
},
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -35,12 +18,6 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{ {
"paramName": "p", "paramName": "p",
"paramLongName": "preparedInfoPath", "paramLongName": "preparedInfoPath",

View File

@ -5,12 +5,6 @@
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the path of the sequencial file to read",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"ocm",
"paramLongName":"organizationtoresultcommunitymap",
"paramDescription": "the map for the association organization communities",
"paramRequired": true
},
{ {
"paramName":"h", "paramName":"h",
"paramLongName":"hive_metastore_uris", "paramLongName":"hive_metastore_uris",
@ -28,6 +22,12 @@
"paramLongName": "outputPath", "paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files", "paramDescription": "the path used to store temporary output files",
"paramRequired": true "paramRequired": true
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "the base URL to the community API to use",
"paramRequired": false
} }
] ]

View File

@ -4,14 +4,14 @@
<name>sourcePath</name> <name>sourcePath</name>
<description>the source path</description> <description>the source path</description>
</property> </property>
<property>
<name>organizationtoresultcommunitymap</name>
<description>organization community map</description>
</property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property>
<name>baseURL</name>
<description>the community API base URL</description>
</property>
</parameters> </parameters>
<global> <global>
@ -93,149 +93,54 @@
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class> <class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=6
--executor-memory=${sparkExecutorMemory} --executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg> <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg> <arg>--baseURL</arg><arg>${baseURL}</arg>
</spark> </spark>
<ok to="fork-join-exec-propagation"/> <ok to="exec-propagation"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="fork-join-exec-propagation"> <action name="exec-propagation">
<path start="join_propagate_publication"/>
<path start="join_propagate_dataset"/>
<path start="join_propagate_otherresearchproduct"/>
<path start="join_propagate_software"/>
</fork>
<action name="join_propagate_publication">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>community2resultfromorganization-Publication</name> <name>community2resultfromorganization</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class> <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=6
--executor-memory=${sparkExecutorMemory} --executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts> </spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="join_propagate_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Dataset</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-ORP</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Software</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<join name="wait2" to="End"/>
<end name="End"/> <end name="End"/>

View File

@ -0,0 +1,28 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "p",
"paramLongName": "preparedInfoPath",
"paramDescription": "the path where prepared info have been stored",
"paramRequired": true
}
]

View File

@ -0,0 +1,28 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "the path used to store temporary output files",
"paramRequired": false
}
]

View File

@ -0,0 +1,58 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
</property>
<property>
<name>sparkExecutorNumber</name>
<value>4</value>
</property>
<property>
<name>sparkDriverMemory</name>
<value>15G</value>
</property>
<property>
<name>sparkExecutorMemory</name>
<value>6G</value>
</property>
<property>
<name>sparkExecutorCores</name>
<value>1</value>
</property>
<property>
<name>spark2MaxExecutors</name>
<value>50</value>
</property>
</configuration>

View File

@ -0,0 +1,147 @@
<workflow-app name="community_to_result_propagation_project" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>baseURL</name>
<description>the community API base URL</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="reset_outputpath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="reset_outputpath">
<fs>
<delete path="${outputPath}"/>
<mkdir path="${outputPath}"/>
</fs>
<ok to="copy_entities"/>
<error to="Kill"/>
</action>
<fork name="copy_entities">
<path start="copy_relation"/>
<path start="copy_organization"/>
<path start="copy_projects"/>
<path start="copy_datasources"/>
</fork>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${outputPath}/relation</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<action name="copy_organization">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/organization</arg>
<arg>${nameNode}/${outputPath}/organization</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<action name="copy_projects">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/project</arg>
<arg>${nameNode}/${outputPath}/project</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<action name="copy_datasources">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/datasource</arg>
<arg>${nameNode}/${outputPath}/datasource</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<join name="copy_wait" to="prepare_result_communitylist"/>
<action name="prepare_result_communitylist">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare-Community-Result-Organization</name>
<class>eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=6
--executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--baseURL</arg><arg>${baseURL}</arg>
</spark>
<ok to="exec-propagation"/>
<error to="Kill"/>
</action>
<action name="exec-propagation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromproject</name>
<class>eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=6
--executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -31,8 +31,6 @@ public class BulkTagJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\"," public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\","
+ " \"title\" : \"$['title'][*]['value']\"," + " \"title\" : \"$['title'][*]['value']\","
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
@ -42,7 +40,9 @@ public class BulkTagJobTest {
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," +
"\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," + "\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," +
"\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " + "\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " +
"\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} "; "\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"," +
"\"publisher\":\"$['publisher'].value\"," +
"\"publicationyear\":\"$['dateofacceptance'].value\"} ";
private static SparkSession spark; private static SparkSession spark;
@ -98,14 +98,11 @@ public class BulkTagJobTest {
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -133,19 +130,16 @@ public class BulkTagJobTest {
@Test @Test
void bulktagBySubjectNoPreviousContextTest() throws Exception { void bulktagBySubjectNoPreviousContextTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/")
.getPath(); .getPath();
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -230,19 +224,19 @@ public class BulkTagJobTest {
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
.getPath(); .getPath();
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -311,18 +305,18 @@ public class BulkTagJobTest {
@Test @Test
void bulktagByDatasourceTest() throws Exception { void bulktagByDatasourceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/publication", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -384,25 +378,25 @@ public class BulkTagJobTest {
void bulktagByZenodoCommunityTest() throws Exception { void bulktagByZenodoCommunityTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/orp", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<OtherResearchProduct> tmp = sc JavaRDD<OtherResearchProduct> tmp = sc
.textFile(workingDir.toString() + "/orp") .textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)); .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, tmp.count()); Assertions.assertEquals(10, tmp.count());
@ -505,18 +499,18 @@ public class BulkTagJobTest {
@Test @Test
void bulktagBySubjectDatasourceTest() throws Exception { void bulktagBySubjectDatasourceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -539,6 +533,7 @@ public class BulkTagJobTest {
+ "where MyD.inferenceprovenance = 'bulktagging'"; + "where MyD.inferenceprovenance = 'bulktagging'";
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query); org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
Assertions.assertEquals(7, idExplodeCommunity.count()); Assertions.assertEquals(7, idExplodeCommunity.count());
Assertions Assertions
@ -636,14 +631,14 @@ public class BulkTagJobTest {
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(), getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -732,18 +727,18 @@ public class BulkTagJobTest {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -774,19 +769,19 @@ public class BulkTagJobTest {
void bulkTagOtherJupyter() throws Exception { void bulkTagOtherJupyter() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct") "/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -829,18 +824,18 @@ public class BulkTagJobTest {
public void bulkTagDatasetJupyter() throws Exception { public void bulkTagDatasetJupyter() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/dataset") "/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -878,18 +873,18 @@ public class BulkTagJobTest {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/software") "/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1096,18 +1091,18 @@ public class BulkTagJobTest {
void galaxyOtherTest() throws Exception { void galaxyOtherTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct") "/eu/dnetlib/dhp/eosctag/galaxy/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1214,18 +1209,18 @@ public class BulkTagJobTest {
void galaxySoftwareTest() throws Exception { void galaxySoftwareTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/software") "/eu/dnetlib/dhp/eosctag/galaxy/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1333,19 +1328,19 @@ public class BulkTagJobTest {
void twitterDatasetTest() throws Exception { void twitterDatasetTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/twitter/dataset") "/eu/dnetlib/dhp/eosctag/twitter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1373,19 +1368,19 @@ public class BulkTagJobTest {
void twitterOtherTest() throws Exception { void twitterOtherTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct") "/eu/dnetlib/dhp/eosctag/twitter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1418,19 +1413,19 @@ public class BulkTagJobTest {
void twitterSoftwareTest() throws Exception { void twitterSoftwareTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/twitter/software") "/eu/dnetlib/dhp/eosctag/twitter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1455,19 +1450,19 @@ public class BulkTagJobTest {
void EoscContextTagTest() throws Exception { void EoscContextTagTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json") "/eu/dnetlib/dhp/bulktag/eosc/dataset/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1533,16 +1528,16 @@ public class BulkTagJobTest {
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass() getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
.getPath(), .getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1568,4 +1563,41 @@ public class BulkTagJobTest {
} }
@Test
void newConfTest() throws Exception {
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-outputPath", workingDir.toString() + "/",
// "-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", pathMap,
"-taggingConf", taggingConf
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
verificationDataset.createOrReplaceTempView("dataset");
String query = "select id, MyT.id community "
+ "from dataset "
+ "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'";
Assertions.assertEquals(0, spark.sql(query).count());
}
} }

View File

@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
sc.setVerb("not_contains"); sc.setVerb("not_contains");
sc.setField("contributor"); sc.setField("contributor");
sc.setValue("DARIAH"); sc.setValue("DARIAH");
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue())); sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
Assertions.assertFalse(sc.verifyCriteria(metadata)); Assertions.assertFalse(sc.verifyCriteria(metadata));
} }

View File

@ -72,15 +72,13 @@ public class ResultToCommunityJobTest {
SparkResultToCommunityFromOrganizationJob SparkResultToCommunityFromOrganizationJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass() "-sourcePath", getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample") .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/")
.getPath(), .getPath(),
"-hive_metastore_uris", "",
"-saveGraph", "true", "-outputPath", workingDir.toString() + "/",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-preparedInfoPath", preparedInfoPath "-preparedInfoPath", preparedInfoPath
}); });

View File

@ -0,0 +1,133 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static org.apache.spark.sql.functions.desc;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Dataset;
public class ResultToCommunityJobTest {
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(OrcidPropagationJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testSparkResultToCommunityFromProjectJob() throws Exception {
final String preparedInfoPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
.getPath();
SparkResultToCommunityFromProject
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
.getPath(),
"-outputPath", workingDir.toString() + "/",
"-preparedInfoPath", preparedInfoPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
/**
* {"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
* {"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
* {"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
* {"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}
*/
List<Context> context = tmp
.filter(r -> r.getId().equals("50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f"))
.first()
.getContext();
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
context = tmp
.filter(r -> r.getId().equals("50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e"))
.first()
.getContext();
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
Assertions
.assertEquals(
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1")).count());
Assertions
.assertEquals(
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1")).count());
Assertions
.assertEquals(
2, tmp.filter(r -> r.getContext().stream().anyMatch(c -> c.getId().equals("aurora"))).count());
}
private static boolean containsResultCommunityProject(Context c) {
return c
.getDataInfo()
.stream()
.anyMatch(di -> di.getProvenanceaction().getClassid().equals("result:community:project"));
}
}

View File

@ -26,7 +26,7 @@
<subjects/> <subjects/>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId> <openaireId>10|re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -140,39 +140,39 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId> <openaireId>10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId> <openaireId>10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId> <openaireId>10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId> <openaireId>10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId> <openaireId>10|doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId> <openaireId>10|re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::c5502a43e76feab55dd00cf50f519125</openaireId> <openaireId>10|openaire____::c5502a43e76feab55dd00cf50f519125</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::a48f09c562b247a9919acfe195549b47</openaireId> <openaireId>10|re3data_____::a48f09c562b247a9919acfe195549b47</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::97275a23ca44226c9964043c8462be96</openaireId> <openaireId>10|opendoar____::97275a23ca44226c9964043c8462be96</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -287,55 +287,55 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId> <openaireId>10|doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId> <openaireId>10|doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId> <openaireId>10|doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId> <openaireId>10|doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId> <openaireId>10|doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId> <openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId> <openaireId>10|doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId> <openaireId>10|tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId> <openaireId>10|doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId> <openaireId>10|doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId> <openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::dca64612dfe0963fffc119098a319957</openaireId> <openaireId>10|doajarticles::dca64612dfe0963fffc119098a319957</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId> <openaireId>10|doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -406,27 +406,27 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId> <openaireId>10|re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId> <openaireId>10|doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::8515794670370f49c1d176c399c714f5</openaireId> <openaireId>10|re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId> <openaireId>10|doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId> <openaireId>10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId> <openaireId>10|rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -743,27 +743,27 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId> <openaireId>10|opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId> <openaireId>10|opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId> <openaireId>10|opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId> <openaireId>10|opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId> <openaireId>10|opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId> <openaireId>10|opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -983,11 +983,11 @@
<subjects/> <subjects/>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId> <openaireId>10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId> <openaireId>10|opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>
@ -1166,87 +1166,87 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId> <openaireId>10|doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId> <openaireId>10|doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId> <openaireId>10|doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::479ca537c12755d1868bbf02938a900c</openaireId> <openaireId>10|doajarticles::479ca537c12755d1868bbf02938a900c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId> <openaireId>10|doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId> <openaireId>10|doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId> <openaireId>10|doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId> <openaireId>10|doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId> <openaireId>10|doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId> <openaireId>10|doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::172050a92511838393a3fe237ae47e31</openaireId> <openaireId>10|doajarticles::172050a92511838393a3fe237ae47e31</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId> <openaireId>10|doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId> <openaireId>10|doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId> <openaireId>10|doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::86faef424d804309ccf45f692523aa48</openaireId> <openaireId>10|doajarticles::86faef424d804309ccf45f692523aa48</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::73bd758fa41671de70964c3ecba013af</openaireId> <openaireId>10|doajarticles::73bd758fa41671de70964c3ecba013af</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId> <openaireId>10|doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId> <openaireId>10|doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId> <openaireId>10|doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId> <openaireId>10|doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId> <openaireId>10|doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -1345,87 +1345,87 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId> <openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId> <openaireId>10|doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId> <openaireId>10|doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::00017183dc4c858fb77541985323a4ef</openaireId> <openaireId>10|doajarticles::00017183dc4c858fb77541985323a4ef</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId> <openaireId>10|doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId> <openaireId>10|doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId> <openaireId>10|doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId> <openaireId>10|doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId> <openaireId>10|doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::f67ad6d268162079b3abd51a24468744</openaireId> <openaireId>10|doajarticles::f67ad6d268162079b3abd51a24468744</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId> <openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId> <openaireId>10|doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId> <openaireId>10|doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId> <openaireId>10|doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId> <openaireId>10|doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId> <openaireId>10|doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId> <openaireId>10|doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId> <openaireId>10|doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId> <openaireId>10|doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId> <openaireId>10|doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId> <openaireId>10|doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -1454,81 +1454,81 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId> <openaireId>10|opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId> <openaireId>10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId> <openaireId>10|driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId> <openaireId>10|openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId> <openaireId>10|openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId> <openaireId>10|openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId> <openaireId>10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId> <openaireId>10|share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId> <openaireId>10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId> <openaireId>10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId> <openaireId>10|re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId> <openaireId>10|re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More