1
0
Fork 0

Merge branch 'beta' into orcid_import

This commit is contained in:
Claudio Atzori 2023-12-01 12:28:14 +01:00
commit 622fafbd2e
125 changed files with 3644 additions and 761 deletions

View File

@ -135,6 +135,24 @@ public class VocabularyGroup implements Serializable {
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn); return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
} }
public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
return find(vocId)
.map(
vocabulary -> Optional
.ofNullable(vocabulary.getTerm(syn))
.map(
term -> OafMapperUtils
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
.orElse(
Optional
.ofNullable(vocabulary.getTermBySynonym(syn))
.map(
term -> OafMapperUtils
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
.orElse(null)))
.orElse(null);
}
/** /**
* getSynonymAsQualifierCaseSensitive * getSynonymAsQualifierCaseSensitive
* *

View File

@ -21,10 +21,15 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
/** /**
@ -35,6 +40,12 @@ public class GroupEntitiesSparkJob {
private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class); private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
private ArgumentApplicationParser parser;
public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
this.parser = parser;
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
@ -51,6 +62,17 @@ public class GroupEntitiesSparkJob {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
}
public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
throws ISLookUpException {
String graphInputPath = parser.get("graphInputPath"); String graphInputPath = parser.get("graphInputPath");
log.info("graphInputPath: {}", graphInputPath); log.info("graphInputPath: {}", graphInputPath);
@ -60,19 +82,21 @@ public class GroupEntitiesSparkJob {
String outputPath = parser.get("outputPath"); String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible")); boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
log.info("filterInvisible: {}", filterInvisible); log.info("filterInvisible: {}", filterInvisible);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses()); conf.registerKryoClasses(ModelSupport.getOafModelClasses());
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration()); HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible); groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
}); });
} }
@ -81,7 +105,7 @@ public class GroupEntitiesSparkJob {
String inputPath, String inputPath,
String checkpointPath, String checkpointPath,
String outputPath, String outputPath,
boolean filterInvisible) { boolean filterInvisible, VocabularyGroup vocs) {
Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC); Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
@ -106,10 +130,14 @@ public class GroupEntitiesSparkJob {
} }
Dataset<?> groupedEntities = allEntities Dataset<?> groupedEntities = allEntities
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) (b, a) -> OafMapperUtils.mergeEntities(b, a))
.map( .map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2( (MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
.applyCoarVocabularies(entity, vocs),
OAFENTITY_KRYO_ENC)
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
.reduceGroups((ReduceFunction<OafEntity>) OafMapperUtils::mergeEntities)
.map(
(MapFunction<Tuple2<String, OafEntity>, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
t._2().getClass().getName(), t._2()), t._2().getClass().getName(), t._2()),
Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC)); Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.net.MalformedURLException; import java.net.MalformedURLException;
@ -870,4 +872,97 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s; return s;
} }
public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
if (entity instanceof Result) {
final Result result = (Result) entity;
Optional
.ofNullable(result.getInstance())
.ifPresent(
instances -> instances
.forEach(
instance -> {
if (Objects.isNull(instance.getInstanceTypeMapping())) {
List<InstanceTypeMapping> mapping = Lists.newArrayList();
mapping
.add(
OafMapperUtils
.instanceTypeMapping(
instance.getInstancetype().getClassname(),
OPENAIRE_COAR_RESOURCE_TYPES_3_1));
instance.setInstanceTypeMapping(mapping);
}
Optional<InstanceTypeMapping> optionalItm = instance
.getInstanceTypeMapping()
.stream()
.filter(GraphCleaningFunctions::originalResourceType)
.findFirst();
if (optionalItm.isPresent()) {
InstanceTypeMapping coarItm = optionalItm.get();
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
.ifPresent(type -> {
coarItm.setTypeCode(type.getClassid());
coarItm.setTypeLabel(type.getClassname());
});
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
Optional
.ofNullable(
vocs
.lookupTermBySynonym(
OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
.ifPresent(
type -> mappings
.add(
OafMapperUtils
.instanceTypeMapping(coarItm.getTypeCode(), type)));
}
if (!mappings.isEmpty()) {
instance.getInstanceTypeMapping().addAll(mappings);
}
}
}));
result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
}
return entity;
}
private static boolean originalResourceType(InstanceTypeMapping itm) {
return StringUtils.isNotBlank(itm.getOriginalType()) &&
OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
StringUtils.isBlank(itm.getTypeCode()) &&
StringUtils.isBlank(itm.getTypeLabel());
}
private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
Optional<InstanceTypeMapping> instanceTypeMapping = instances
.stream()
.flatMap(
i -> Optional.ofNullable(i.getInstanceTypeMapping()).map(Collection::stream).orElse(Stream.empty()))
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
.findFirst();
if (!instanceTypeMapping.isPresent()) {
return null;
} else {
final String typeCode = instanceTypeMapping.get().getTypeCode();
return Optional
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
.orElseThrow(
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
OPENAIRE_META_RESOURCE_TYPE));
}
} else {
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
}
}
} }

View File

@ -14,7 +14,6 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.AccessRightComparator; import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -141,6 +140,28 @@ public class OafMapperUtils {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
String vocabularyName) {
final InstanceTypeMapping m = new InstanceTypeMapping();
m.setVocabularyName(vocabularyName);
m.setOriginalType(originalType);
m.setTypeCode(code);
m.setTypeLabel(label);
return m;
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
}
public static InstanceTypeMapping instanceTypeMapping(String originalType) {
return instanceTypeMapping(originalType, null, null, null);
}
public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
return instanceTypeMapping(originalType, null, null, vocabularyName);
}
public static Qualifier unknown(final String schemeid, final String schemename) { public static Qualifier unknown(final String schemeid, final String schemename) {
return qualifier(UNKNOWN, "Unknown", schemeid, schemename); return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
} }

View File

@ -28,5 +28,11 @@
"paramLongName": "filterInvisible", "paramLongName": "filterInvisible",
"paramDescription": "if true filters out invisible entities", "paramDescription": "if true filters out invisible entities",
"paramRequired": true "paramRequired": true
},
{
"paramName": "isu",
"paramLongName": "isLookupUrl",
"paramDescription": "url to the ISLookup Service",
"paramRequired": true
} }
] ]

View File

@ -166,7 +166,7 @@ object DataciteToOAFTransformation {
resourceTypeGeneral: String, resourceTypeGeneral: String,
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): (Qualifier, Qualifier) = { ): (Qualifier, Qualifier, String) = {
if (resourceType != null && resourceType.nonEmpty) { if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier = val typeQualifier =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
@ -176,7 +176,7 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ), resourceType
) )
} }
if (schemaOrg != null && schemaOrg.nonEmpty) { if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +188,7 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ), schemaOrg
) )
} }
@ -203,7 +203,7 @@ object DataciteToOAFTransformation {
vocabularies.getSynonymAsQualifier( vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid typeQualifier.getClassid
) ), resourceTypeGeneral
) )
} }
@ -216,12 +216,19 @@ object DataciteToOAFTransformation {
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): Result = { ): Result = {
val typeQualifiers: (Qualifier, Qualifier) = val typeQualifiers: (Qualifier, Qualifier, String) =
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
if (typeQualifiers == null) if (typeQualifiers == null)
return null return null
val i = new Instance val i = new Instance
i.setInstancetype(typeQualifiers._1) i.setInstancetype(typeQualifiers._1)
// ADD ORIGINAL TYPE
val itm = new InstanceTypeMapping
itm.setOriginalType(typeQualifiers._3)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
typeQualifiers._2.getClassname match { typeQualifiers._2.getClassname match {
case "dataset" => case "dataset" =>
val r = new OafDataset val r = new OafDataset

View File

@ -176,7 +176,7 @@ object BioDBToOAF {
i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava) i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava)
} }
if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) {
i.setInstancetype( i.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0037", "0037",
@ -185,7 +185,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
else val itm = new InstanceTypeMapping
itm.setOriginalType(input.pidType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
} else {
i.setInstancetype( i.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
@ -194,6 +198,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
}
if (input.datasource == null || input.datasource.isEmpty) if (input.datasource == null || input.datasource.isEmpty)
return null return null
@ -265,6 +274,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("uniprot")) i.setCollectedfrom(collectedFromMap("uniprot"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
@ -471,6 +484,10 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("pdb")) i.setCollectedfrom(collectedFromMap("pdb"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
@ -571,6 +588,11 @@ object BioDBToOAF {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
val itm = new InstanceTypeMapping
itm.setOriginalType("Bioentity")
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
i.setInstanceTypeMapping(List(itm).asJava)
i.setCollectedfrom(collectedFromMap("ebi")) i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)

View File

@ -188,12 +188,24 @@ object PubMedToOaf {
val cojbCategory = val cojbCategory =
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue)
pubmedInstance.setInstancetype(cojbCategory) pubmedInstance.setInstancetype(cojbCategory)
// ADD ORIGINAL TYPE to the publication
val itm = new InstanceTypeMapping
itm.setOriginalType(ja.get.getValue)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
} else { } else {
val i_type = article.getPublicationTypes.asScala val i_type = article.getPublicationTypes.asScala
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .map(s => (s.getValue,getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
.find(q => q != null) .find(q => q._2 != null)
if (i_type.isDefined)
pubmedInstance.setInstancetype(i_type.get) if (i_type.isDefined) {
pubmedInstance.setInstancetype(i_type.get._2)
// ADD ORIGINAL TYPE to the publication
val itm = new InstanceTypeMapping
itm.setOriginalType(i_type.get._1)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
}
else else
return null return null
} }

View File

@ -107,7 +107,7 @@ case object Crossref2Oaf {
.map(f => f.id) .map(f => f.id)
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType:String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
@ -283,6 +283,11 @@ case object Crossref2Oaf {
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
//ADD ORIGINAL TYPE to the mapping
val itm = new InstanceTypeMapping
itm.setOriginalType(originalType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
instance.setInstanceTypeMapping(List(itm).asJava)
result.setResourcetype( result.setResourcetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
cobjCategory.substring(0, 4), cobjCategory.substring(0, 4),
@ -367,7 +372,7 @@ case object Crossref2Oaf {
objectType, objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type") mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
) )
mappingResult(result, json, cOBJCategory) mappingResult(result, json, cOBJCategory, originalType)
if (result == null || result.getId == null) if (result == null || result.getId == null)
return List() return List()

View File

@ -71,6 +71,9 @@ public class PropagationConstant {
public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID = "result:community:organization"; public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID = "result:community:organization";
public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME = " Propagation of result belonging to community through organization"; public static final String PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME = " Propagation of result belonging to community through organization";
public static final String PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID = "result:community:project";
public static final String PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME = " Propagation of result belonging to community through project";
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result";
public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations";

View File

@ -0,0 +1,80 @@
package eu.dnetlib.dhp.api;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import org.jetbrains.annotations.NotNull;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class QueryCommunityAPI {
private static String get(String geturl) throws IOException {
URL url = new URL(geturl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
conn.setRequestMethod("GET");
int responseCode = conn.getResponseCode();
String body = getBody(conn);
conn.disconnect();
if (responseCode != HttpURLConnection.HTTP_OK)
throw new IOException("Unexpected code " + responseCode + body);
return body;
}
public static String communities(String baseURL) throws IOException {
return get(baseURL + "communities");
}
public static String community(String id, String baseURL) throws IOException {
return get(baseURL + id);
}
public static String communityDatasource(String id, String baseURL) throws IOException {
return get(baseURL + id + "/contentproviders");
}
public static String communityPropagationOrganization(String id, String baseURL) throws IOException {
return get(baseURL + id + "/propagationOrganizations");
}
public static String communityProjects(String id, String page, String size, String baseURL) throws IOException {
return get(baseURL + id + "/projects/" + page + "/" + size);
}
@NotNull
private static String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
body = response.toString();
}
return body;
}
}

View File

@ -0,0 +1,170 @@
package eu.dnetlib.dhp.api;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import javax.management.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.api.model.*;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class Utils implements Serializable {
private static final ObjectMapper MAPPER = new ObjectMapper();
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
private static final Logger log = LoggerFactory.getLogger(Utils.class);
public static CommunityConfiguration getCommunityConfiguration(String baseURL) throws IOException {
final Map<String, Community> communities = Maps.newHashMap();
List<Community> validCommunities = new ArrayList<>();
getValidCommunities(baseURL)
.forEach(community -> {
try {
CommunityModel cm = MAPPER
.readValue(QueryCommunityAPI.community(community.getId(), baseURL), CommunityModel.class);
validCommunities.add(getCommunity(cm));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
validCommunities.forEach(community -> {
try {
DatasourceList dl = MAPPER
.readValue(
QueryCommunityAPI.communityDatasource(community.getId(), baseURL), DatasourceList.class);
community.setProviders(dl.stream().map(d -> {
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
return null;
Provider p = new Provider();
p.setOpenaireId("10|" + d.getOpenaireId());
p.setSelectionConstraints(d.getSelectioncriteria());
if (p.getSelectionConstraints() != null)
p.getSelectionConstraints().setSelection(resolver);
return p;
})
.filter(Objects::nonNull)
.collect(Collectors.toList()));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
validCommunities.forEach(community -> {
if (community.isValid())
communities.put(community.getId(), community);
});
return new CommunityConfiguration(communities);
}
private static Community getCommunity(CommunityModel cm) {
Community c = new Community();
c.setId(cm.getId());
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
c.getZenodoCommunities().add(cm.getZenodoCommunity());
c.setSubjects(cm.getSubjects());
c.getSubjects().addAll(cm.getFos());
c.getSubjects().addAll(cm.getSdg());
if (cm.getAdvancedConstraints() != null) {
c.setConstraints(cm.getAdvancedConstraints());
c.getConstraints().setSelection(resolver);
}
if (cm.getRemoveConstraints() != null) {
c.setRemoveConstraints(cm.getRemoveConstraints());
c.getRemoveConstraints().setSelection(resolver);
}
return c;
}
public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException {
return MAPPER
.readValue(QueryCommunityAPI.communities(baseURL), CommunitySummary.class)
.stream()
.filter(
community -> !community.getStatus().equals("hidden") &&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
/**
* it returns for each organization the list of associated communities
*/
public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities(baseURL)
.forEach(community -> {
String id = community.getId();
try {
List<String> associatedOrgs = MAPPER
.readValue(
QueryCommunityAPI.communityPropagationOrganization(id, baseURL), OrganizationList.class);
associatedOrgs.forEach(o -> {
if (!organizationMap
.keySet()
.contains(
"20|" + o))
organizationMap.put("20|" + o, new ArrayList<>());
organizationMap.get("20|" + o).add(community.getId());
});
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap;
}
public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException {
CommunityEntityMap projectMap = new CommunityEntityMap();
getValidCommunities(baseURL)
.forEach(community -> {
int page = -1;
int size = 100;
ContentModel cm = new ContentModel();
do {
page++;
try {
cm = MAPPER
.readValue(
QueryCommunityAPI
.communityProjects(
community.getId(), String.valueOf(page), String.valueOf(size), baseURL),
ContentModel.class);
if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> {
if (!projectMap.keySet().contains("40|" + p.getOpenaireId()))
projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>());
projectMap.get("40|" + p.getOpenaireId()).add(community.getId());
});
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} while (!cm.getLast());
});
return projectMap;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
@JsonAutoDetect
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityContentprovider {
private String openaireId;
private SelectionConstraints selectioncriteria;
private String enabled;
public String getEnabled() {
return enabled;
}
public void setEnabled(String enabled) {
this.enabled = enabled;
}
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(final String openaireId) {
this.openaireId = openaireId;
}
public SelectionConstraints getSelectioncriteria() {
return this.selectioncriteria;
}
public void setSelectioncriteria(SelectionConstraints selectioncriteria) {
this.selectioncriteria = selectioncriteria;
}
}

View File

@ -1,13 +1,13 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization; package eu.dnetlib.dhp.api.model;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
public class OrganizationMap extends HashMap<String, List<String>> { public class CommunityEntityMap extends HashMap<String, List<String>> {
public OrganizationMap() { public CommunityEntityMap() {
super(); super();
} }

View File

@ -0,0 +1,108 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityModel implements Serializable {
private String id;
private String type;
private String status;
private String zenodoCommunity;
private List<String> subjects;
private List<String> otherZenodoCommunities;
private List<String> fos;
private List<String> sdg;
private SelectionConstraints advancedConstraints;
private SelectionConstraints removeConstraints;
public String getZenodoCommunity() {
return zenodoCommunity;
}
public void setZenodoCommunity(String zenodoCommunity) {
this.zenodoCommunity = zenodoCommunity;
}
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public List<String> getOtherZenodoCommunities() {
return otherZenodoCommunities;
}
public void setOtherZenodoCommunities(List<String> otherZenodoCommunities) {
this.otherZenodoCommunities = otherZenodoCommunities;
}
public List<String> getFos() {
return fos;
}
public void setFos(List<String> fos) {
this.fos = fos;
}
public List<String> getSdg() {
return sdg;
}
public void setSdg(List<String> sdg) {
this.sdg = sdg;
}
public SelectionConstraints getRemoveConstraints() {
return removeConstraints;
}
public void setRemoveConstraints(SelectionConstraints removeConstraints) {
this.removeConstraints = removeConstraints;
}
public SelectionConstraints getAdvancedConstraints() {
return advancedConstraints;
}
public void setAdvancedConstraints(SelectionConstraints advancedConstraints) {
this.advancedConstraints = advancedConstraints;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
public CommunitySummary() {
super();
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ContentModel implements Serializable {
private List<ProjectModel> content;
private Integer totalPages;
private Boolean last;
private Integer number;
public List<ProjectModel> getContent() {
return content;
}
public void setContent(List<ProjectModel> content) {
this.content = content;
}
public Integer getTotalPages() {
return totalPages;
}
public void setTotalPages(Integer totalPages) {
this.totalPages = totalPages;
}
public Boolean getLast() {
return last;
}
public void setLast(Boolean last) {
this.last = last;
}
public Integer getNumber() {
return number;
}
public void setNumber(Integer number) {
this.number = number;
}
}

View File

@ -0,0 +1,13 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.ArrayList;
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
public DatasourceList() {
super();
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class OrganizationList extends ArrayList<String> implements Serializable {
public OrganizationList() {
super();
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.api.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ProjectModel implements Serializable {
private String openaireId;
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(String openaireId) {
this.openaireId = openaireId;
}
}

View File

@ -9,7 +9,6 @@ import java.util.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -21,8 +20,11 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ -54,50 +56,39 @@ public class SparkBulkTagJob {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
Boolean isTest = Optional
.ofNullable(parser.get("isTest"))
.map(Boolean::valueOf)
.orElse(Boolean.FALSE);
log.info("isTest: {} ", isTest);
final String inputPath = parser.get("sourcePath"); final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath); log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final String baseURL = parser.get("baseURL");
log.info("baseURL: {}", baseURL);
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class); ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
log.info("pathMap: {}", new Gson().toJson(protoMappingParams)); log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final Boolean saveGraph = Optional
.ofNullable(parser.get("saveGraph"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("saveGraph: {}", saveGraph);
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
CommunityConfiguration cc; CommunityConfiguration cc;
String taggingConf = parser.get("taggingConf"); String taggingConf = Optional
.ofNullable(parser.get("taggingConf"))
.map(String::valueOf)
.orElse(null);
if (isTest) { if (taggingConf != null) {
cc = CommunityConfigurationFactory.newInstance(taggingConf); cc = CommunityConfigurationFactory.newInstance(taggingConf);
} else { } else {
cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); cc = Utils.getCommunityConfiguration(baseURL);
log.info(OBJECT_MAPPER.writeValueAsString(cc));
} }
runWithSparkSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath);
extendCommunityConfigurationForEOSC(spark, inputPath, cc); extendCommunityConfigurationForEOSC(spark, inputPath, cc);
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc); execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc);
}); });
} }
@ -106,10 +97,7 @@ public class SparkBulkTagJob {
Dataset<String> datasources = readPath( Dataset<String> datasources = readPath(
spark, inputPath spark, inputPath
.substring( + "datasource",
0,
inputPath.lastIndexOf("/"))
+ "/datasource",
Datasource.class) Datasource.class)
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds)) .filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING()); .map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
@ -117,10 +105,10 @@ public class SparkBulkTagJob {
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap(); Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
for (String ds : datasources.collectAsList()) { for (String ds : datasources.collectAsList()) {
final String dsId = ds.substring(3); // final String dsId = ds.substring(3);
if (!dsm.containsKey(dsId)) { if (!dsm.containsKey(ds)) {
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>(); ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
dsm.put(dsId, eoscList); dsm.put(ds, eoscList);
} }
} }
@ -142,11 +130,17 @@ public class SparkBulkTagJob {
String inputPath, String inputPath,
String outputPath, String outputPath,
ProtoMap protoMappingParams, ProtoMap protoMappingParams,
Class<R> resultClazz,
CommunityConfiguration communityConfiguration) { CommunityConfiguration communityConfiguration) {
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(e -> ModelSupport.isResult(e))
.forEach(e -> {
removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger(); ResultTagger resultTagger = new ResultTagger();
readPath(spark, inputPath, resultClazz) Class<R> resultClazz = ModelSupport.entityTypes.get(e);
readPath(spark, inputPath + e.name(), resultClazz)
.map(patchResult(), Encoders.bean(resultClazz)) .map(patchResult(), Encoders.bean(resultClazz))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map( .map(
@ -157,7 +151,9 @@ public class SparkBulkTagJob {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath + e.name());
});
} }
public static <R> Dataset<R> readPath( public static <R> Dataset<R> readPath(

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Optional;
import com.google.gson.Gson; import com.google.gson.Gson;
@ -13,7 +14,7 @@ public class Community implements Serializable {
private String id; private String id;
private List<String> subjects = new ArrayList<>(); private List<String> subjects = new ArrayList<>();
private List<Provider> providers = new ArrayList<>(); private List<Provider> providers = new ArrayList<>();
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>(); private List<String> zenodoCommunities = new ArrayList<>();
private SelectionConstraints constraints = new SelectionConstraints(); private SelectionConstraints constraints = new SelectionConstraints();
private SelectionConstraints removeConstraints = new SelectionConstraints(); private SelectionConstraints removeConstraints = new SelectionConstraints();
@ -26,7 +27,7 @@ public class Community implements Serializable {
return !getSubjects().isEmpty() return !getSubjects().isEmpty()
|| !getProviders().isEmpty() || !getProviders().isEmpty()
|| !getZenodoCommunities().isEmpty() || !getZenodoCommunities().isEmpty()
|| getConstraints().getCriteria() != null; || (Optional.ofNullable(getConstraints()).isPresent() && getConstraints().getCriteria() != null);
} }
public String getId() { public String getId() {
@ -53,11 +54,11 @@ public class Community implements Serializable {
this.providers = providers; this.providers = providers;
} }
public List<ZenodoCommunity> getZenodoCommunities() { public List<String> getZenodoCommunities() {
return zenodoCommunities; return zenodoCommunities;
} }
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) { public void setZenodoCommunities(List<String> zenodoCommunities) {
this.zenodoCommunities = zenodoCommunities; this.zenodoCommunities = zenodoCommunities;
} }

View File

@ -81,7 +81,7 @@ public class CommunityConfiguration implements Serializable {
this.removeConstraintsMap = removeConstraintsMap; this.removeConstraintsMap = removeConstraintsMap;
} }
CommunityConfiguration(final Map<String, Community> communities) { public CommunityConfiguration(final Map<String, Community> communities) {
this.communities = communities; this.communities = communities;
init(); init();
} }
@ -117,10 +117,10 @@ public class CommunityConfiguration implements Serializable {
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap); add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
} }
// get zenodo communities // get zenodo communities
for (ZenodoCommunity zc : c.getZenodoCommunities()) { for (String zc : c.getZenodoCommunities()) {
add( add(
zc.getZenodoCommunityId(), zc,
new Pair<>(id, zc.getSelCriteria()), new Pair<>(id, null),
zenodocommunityMap); zenodocommunityMap);
} }
selectionConstraintsMap.put(id, c.getConstraints()); selectionConstraintsMap.put(id, c.getConstraints());

View File

@ -143,16 +143,16 @@ public class CommunityConfigurationFactory {
return providerList; return providerList;
} }
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) { private static List<String> parseZenodoCommunities(final Node node) {
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity"); final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>(); final List<String> zenodoCommunityList = new ArrayList<>();
for (Node n : list) { for (Node n : list) {
ZenodoCommunity zc = new ZenodoCommunity(); // ZenodoCommunity zc = new ZenodoCommunity();
zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); // zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
zc.setSelCriteria(n.selectSingleNode("./selcriteria")); // zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
zenodoCommunityList.add(zc); zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText());
} }
log.info("size of the zenodo community list " + zenodoCommunityList.size()); log.info("size of the zenodo community list " + zenodoCommunityList.size());

View File

@ -4,6 +4,8 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
@ -12,6 +14,7 @@ public class Constraint implements Serializable {
private String field; private String field;
private String value; private String value;
// private String element; // private String element;
@JsonIgnore
private Selection selection; private Selection selection;
public String getVerb() { public String getVerb() {
@ -38,10 +41,11 @@ public class Constraint implements Serializable {
this.value = value; this.value = value;
} }
public void setSelection(Selection sel) { //@JsonIgnore
selection = sel; // public void setSelection(Selection sel) {
} // selection = sel;
// }
@JsonIgnore
public void setSelection(VerbResolver resolver) public void setSelection(VerbResolver resolver)
throws InvocationTargetException, NoSuchMethodException, InstantiationException, throws InvocationTargetException, NoSuchMethodException, InstantiationException,
IllegalAccessException { IllegalAccessException {
@ -52,11 +56,4 @@ public class Constraint implements Serializable {
return selection.apply(metadata); return selection.apply(metadata);
} }
// public String getElement() {
// return element;
// }
//
// public void setElement(String element) {
// this.element = element;
// }
} }

View File

@ -1,34 +0,0 @@
package eu.dnetlib.dhp.bulktag.community;
import java.io.IOException;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.dom4j.DocumentException;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
throws ISLookUpException, DocumentException, SAXException, IOException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
final List<String> res = isLookUp
.quickSearchProfile(
IOUtils
.toString(
QueryInformationSystem.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/query.xq")));
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
return CommunityConfigurationFactory.newInstance(xmlConf);
}
}

View File

@ -82,11 +82,15 @@ public class ResultTagger implements Serializable {
// communities contains all the communities to be not added to the context // communities contains all the communities to be not added to the context
final Set<String> removeCommunities = new HashSet<>(); final Set<String> removeCommunities = new HashSet<>();
// if (conf.getRemoveConstraintsMap().keySet().size() > 0)
conf conf
.getRemoveConstraintsMap() .getRemoveConstraintsMap()
.keySet() .keySet()
.forEach(communityId -> { .forEach(
if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && communityId -> {
// log.info("Remove constraints for " + communityId);
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
conf conf
.getRemoveConstraintsMap() .getRemoveConstraintsMap()
.get(communityId) .get(communityId)
@ -124,10 +128,10 @@ public class ResultTagger implements Serializable {
if (Objects.nonNull(result.getInstance())) { if (Objects.nonNull(result.getInstance())) {
for (Instance i : result.getInstance()) { for (Instance i : result.getInstance()) {
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); collfrom.add(i.getCollectedfrom().getKey());
} }
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); hostdby.add(i.getHostedby().getKey());
} }
} }

View File

@ -7,11 +7,13 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken; import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
@JsonAutoDetect
public class SelectionConstraints implements Serializable { public class SelectionConstraints implements Serializable {
private List<Constraints> criteria; private List<Constraints> criteria;

View File

@ -9,9 +9,7 @@ import java.util.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
@ -48,10 +48,10 @@ public class PrepareResultCommunitySet {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath); log.info("outputPath: {}", outputPath);
final OrganizationMap organizationMap = new Gson() final String baseURL = parser.get("baseURL");
.fromJson( log.info("baseURL: {}", baseURL);
parser.get("organizationtoresultcommunitymap"),
OrganizationMap.class); final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL);
log.info("organizationMap: {}", new Gson().toJson(organizationMap)); log.info("organizationMap: {}", new Gson().toJson(organizationMap));
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
@ -70,7 +70,7 @@ public class PrepareResultCommunitySet {
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,
String outputPath, String outputPath,
OrganizationMap organizationMap) { CommunityEntityMap organizationMap) {
Dataset<Relation> relation = readPath(spark, inputPath, Relation.class); Dataset<Relation> relation = readPath(spark, inputPath, Relation.class);
relation.createOrReplaceTempView("relation"); relation.createOrReplaceTempView("relation");
@ -115,7 +115,7 @@ public class PrepareResultCommunitySet {
} }
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn( private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
OrganizationMap organizationMap) { CommunityEntityMap organizationMap) {
return value -> { return value -> {
String rId = value.getResultId(); String rId = value.getResultId();
Optional<List<String>> orgs = Optional.ofNullable(value.getMerges()); Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.resulttocommunityfromorganization; package eu.dnetlib.dhp.resulttocommunityfromorganization;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2; import scala.Tuple2;
@ -53,29 +54,14 @@ public class SparkResultToCommunityFromOrganizationJob {
final String possibleupdatespath = parser.get("preparedInfoPath"); final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath); log.info("preparedInfoPath: {}", possibleupdatespath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final Boolean saveGraph = Optional
.ofNullable(parser.get("saveGraph"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("saveGraph: {}", saveGraph);
@SuppressWarnings("unchecked")
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession( runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath); execPropagation(spark, inputPath, outputPath, possibleupdatespath);
if (saveGraph) {
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
}
}); });
} }
@ -83,11 +69,18 @@ public class SparkResultToCommunityFromOrganizationJob {
SparkSession spark, SparkSession spark,
String inputPath, String inputPath,
String outputPath, String outputPath,
Class<R> resultClazz,
String possibleUpdatesPath) { String possibleUpdatesPath) {
Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class); Dataset<ResultCommunityList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class);
Dataset<R> result = readPath(spark, inputPath, resultClazz);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
removeOutputDir(spark, outputPath + e.name());
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
result result
.joinWith( .joinWith(
@ -98,7 +91,10 @@ public class SparkResultToCommunityFromOrganizationJob {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath + e.name());
}
});
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {

View File

@ -0,0 +1,123 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultOrganizations;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
public class PrepareResultCommunitySet {
private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySet.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultCommunitySet.class
.getResourceAsStream(
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String baseURL = parser.get("baseURL");
log.info("baseUEL: {}", baseURL);
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
prepareInfo(spark, inputPath, outputPath, projectsMap);
});
}
private static void prepareInfo(
SparkSession spark,
String inputPath,
String outputPath,
CommunityEntityMap projectMap) {
final StructType structureSchema = new StructType()
.add(
"dataInfo", new StructType()
.add("deletedbyinference", DataTypes.BooleanType)
.add("invisible", DataTypes.BooleanType))
.add("source", DataTypes.StringType)
.add("target", DataTypes.StringType)
.add("relClass", DataTypes.StringType);
spark
.read()
.schema(structureSchema)
.json(inputPath)
.filter(
"dataInfo.deletedbyinference != true " +
"and relClass == '" + ModelConstants.IS_PRODUCED_BY + "'")
.select(
new Column("source").as("resultId"),
new Column("target").as("projectId"))
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
ResultProjectList rpl = new ResultProjectList();
rpl.setResultId(k);
ArrayList<String> cl = new ArrayList<>();
cl.addAll(projectMap.get(v.next().getAs("projectId")));
v.forEachRemaining(r -> {
projectMap
.get(r.getAs("projectId"))
.forEach(c -> {
if (!cl.contains(c))
cl.add(c);
});
});
if (cl.size() == 0)
return null;
rpl.setCommunityList(cl);
return rpl;
}, Encoders.bean(ResultProjectList.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
}

View File

@ -0,0 +1,26 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import java.io.Serializable;
import java.util.ArrayList;
public class ResultProjectList implements Serializable {
private String resultId;
private ArrayList<String> communityList;
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public ArrayList<String> getCommunityList() {
return communityList;
}
public void setCommunityList(ArrayList<String> communityList) {
this.communityList = communityList;
}
}

View File

@ -0,0 +1,163 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 11/10/23
*/
public class SparkResultToCommunityFromProject implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityFromProject.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkResultToCommunityFromProject.class
.getResourceAsStream(
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String possibleupdatespath = parser.get("preparedInfoPath");
log.info("preparedInfoPath: {}", possibleupdatespath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
});
}
private static <R extends Result> void execPropagation(
SparkSession spark,
String inputPath,
String outputPath,
String possibleUpdatesPath) {
Dataset<ResultProjectList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultProjectList.class);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.forEach(e -> {
if (ModelSupport.isResult(e)) {
removeOutputDir(spark, outputPath + e.name());
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
result
.joinWith(
possibleUpdates,
result.col("id").equalTo(possibleUpdates.col("resultId")),
"left_outer")
.map(resultCommunityFn(), Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + e.name());
}
});
}
private static <R extends Result> MapFunction<Tuple2<R, ResultProjectList>, R> resultCommunityFn() {
return value -> {
R ret = value._1();
Optional<ResultProjectList> rcl = Optional.ofNullable(value._2());
if (rcl.isPresent()) {
// ArrayList<String> communitySet = rcl.get().getCommunityList();
List<String> contextList = ret
.getContext()
.stream()
.map(Context::getId)
.collect(Collectors.toList());
@SuppressWarnings("unchecked")
R res = (R) ret.getClass().newInstance();
res.setId(ret.getId());
List<Context> propagatedContexts = new ArrayList<>();
for (String cId : rcl.get().getCommunityList()) {
if (!contextList.contains(cId)) {
Context newContext = new Context();
newContext.setId(cId);
newContext
.setDataInfo(
Arrays
.asList(
getDataInfo(
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS)));
propagatedContexts.add(newContext);
} else {
ret
.getContext()
.stream()
.filter(c -> c.getId().equals(cId))
.findFirst()
.get()
.getDataInfo()
.add(
getDataInfo(
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS));
}
}
res.setContext(propagatedContexts);
ret.mergeFrom(res);
}
return ret;
};
}
}

View File

@ -1,10 +1,5 @@
[ [
{
"paramName":"is",
"paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": true
},
{ {
"paramName":"s", "paramName":"s",
"paramLongName":"sourcePath", "paramLongName":"sourcePath",
@ -17,12 +12,7 @@
"paramDescription": "the json path associated to each selection field", "paramDescription": "the json path associated to each selection field",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -35,17 +25,19 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{
"paramName": "test",
"paramLongName": "isTest",
"paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded",
"paramRequired": false
},
{ {
"paramName": "tg", "paramName": "tg",
"paramLongName": "taggingConf", "paramLongName": "taggingConf",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
"paramRequired": false "paramRequired": false
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": false
} }
] ]

View File

@ -4,10 +4,6 @@
<name>sourcePath</name> <name>sourcePath</name>
<description>the source path</description> <description>the source path</description>
</property> </property>
<property>
<name>isLookUpUrl</name>
<description>the isLookup service endpoint</description>
</property>
<property> <property>
<name>pathMap</name> <name>pathMap</name>
<description>the json path associated to each selection field</description> <description>the json path associated to each selection field</description>
@ -44,7 +40,7 @@
</configuration> </configuration>
</global> </global>
<start to="reset_outputpath"/> <start to="exec_bulktag"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -102,16 +98,9 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="copy_wait" to="fork_exec_bulktag"/> <join name="copy_wait" to="exec_bulktag"/>
<fork name="fork_exec_bulktag"> <action name="exec_bulktag">
<path start="bulktag_publication"/>
<path start="bulktag_dataset"/>
<path start="bulktag_otherresearchproduct"/>
<path start="bulktag_software"/>
</fork>
<action name="bulktag_publication">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master> <master>yarn-cluster</master>
<mode>cluster</mode> <mode>cluster</mode>
@ -128,98 +117,15 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg> <arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <arg>--production</arg><arg>${production}</arg>
</spark> </spark>
<ok to="wait"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="bulktag_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-dataset</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="bulktag_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-orp</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="bulktag_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-software</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
</action>
<join name="wait" to="End"/>
<end name="End"/> <end name="End"/>

View File

@ -1,62 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
let $subj := $x//CONFIGURATION/context/param[./@name='subject']/text()
let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept
let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept
let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept
let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text()
let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text()
let $zenodo := $x//param[./@name='zenodoCommunity']/text()
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden'
return
<community>
{ $x//CONFIGURATION/context/@id}
<removeConstraints>
{$x//CONFIGURATION/context/param[./@name='removeConstraints']/text() }
</removeConstraints>
<advancedConstraints>
{$x//CONFIGURATION/context/param[./@name='advancedConstraints']/text() }
</advancedConstraints>
<subjects>
{for $y in tokenize($subj,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($fos,',')
return
<subject>{$y}</subject>}
{for $y in tokenize($sdg,',')
return
<subject>{$y}</subject>}
</subjects>
<datasources>
{for $d in $datasources
where $d/param[./@name='enabled']/text()='true'
return
<datasource>
<openaireId>
{$d//param[./@name='openaireId']/text()}
</openaireId>
<selcriteria>
{$d/param[./@name='selcriteria']/text()}
</selcriteria>
</datasource> }
</datasources>
<zenodocommunities>
{for $zc in $zenodo
return
<zenodocommunity>
<zenodoid>
{$zc}
</zenodoid>
</zenodocommunity>}
{for $zc in $communities
return
<zenodocommunity>
<zenodoid>
{$zc/param[./@name='zenodoid']/text()}
</zenodoid>
<selcriteria>
{$zc/param[./@name='selcriteria']/text()}
</selcriteria>
</zenodocommunity>}
</zenodocommunities>
</community>

View File

@ -5,24 +5,7 @@
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the path of the sequencial file to read",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"h",
"paramLongName":"hive_metastore_uris",
"paramDescription": "the hive metastore uris",
"paramRequired": true
},
{
"paramName":"sg",
"paramLongName":"saveGraph",
"paramDescription": "true if the new version of the graph must be saved",
"paramRequired": false
},
{
"paramName":"test",
"paramLongName":"isTest",
"paramDescription": "true if it is executing a test",
"paramRequired": false
},
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -35,12 +18,6 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{ {
"paramName": "p", "paramName": "p",
"paramLongName": "preparedInfoPath", "paramLongName": "preparedInfoPath",

View File

@ -5,12 +5,6 @@
"paramDescription": "the path of the sequencial file to read", "paramDescription": "the path of the sequencial file to read",
"paramRequired": true "paramRequired": true
}, },
{
"paramName":"ocm",
"paramLongName":"organizationtoresultcommunitymap",
"paramDescription": "the map for the association organization communities",
"paramRequired": true
},
{ {
"paramName":"h", "paramName":"h",
"paramLongName":"hive_metastore_uris", "paramLongName":"hive_metastore_uris",
@ -28,6 +22,12 @@
"paramLongName": "outputPath", "paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files", "paramDescription": "the path used to store temporary output files",
"paramRequired": true "paramRequired": true
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "the base URL to the community API to use",
"paramRequired": false
} }
] ]

View File

@ -4,10 +4,7 @@
<name>sourcePath</name> <name>sourcePath</name>
<description>the source path</description> <description>the source path</description>
</property> </property>
<property>
<name>organizationtoresultcommunitymap</name>
<description>organization community map</description>
</property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
@ -25,7 +22,7 @@
</configuration> </configuration>
</global> </global>
<start to="reset_outputpath"/> <start to="prepare_result_communitylist"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -93,33 +90,28 @@
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class> <class>eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=6
--executor-memory=${sparkExecutorMemory} --executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg> <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg> <arg>--production</arg><arg>${production}</arg>
</spark> </spark>
<ok to="fork-join-exec-propagation"/> <ok to="exec-propagation"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="fork-join-exec-propagation"> <action name="exec-propagation">
<path start="join_propagate_publication"/>
<path start="join_propagate_dataset"/>
<path start="join_propagate_otherresearchproduct"/>
<path start="join_propagate_software"/>
</fork>
<action name="join_propagate_publication">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
@ -127,115 +119,26 @@
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class> <class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=6
--executor-memory=${sparkExecutorMemory} --executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory} --driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners} --conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts> </spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="join_propagate_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Dataset</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-ORP</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<action name="join_propagate_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromorganization-Software</name>
<class>eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
</action>
<join name="wait2" to="End"/>
<end name="End"/> <end name="End"/>

View File

@ -0,0 +1,28 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "p",
"paramLongName": "preparedInfoPath",
"paramDescription": "the path where prepared info have been stored",
"paramRequired": true
}
]

View File

@ -0,0 +1,28 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "the path used to store temporary output files",
"paramRequired": false
}
]

View File

@ -0,0 +1,58 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
</property>
<property>
<name>sparkExecutorNumber</name>
<value>4</value>
</property>
<property>
<name>sparkDriverMemory</name>
<value>15G</value>
</property>
<property>
<name>sparkExecutorMemory</name>
<value>6G</value>
</property>
<property>
<name>sparkExecutorCores</name>
<value>1</value>
</property>
<property>
<name>spark2MaxExecutors</name>
<value>50</value>
</property>
</configuration>

View File

@ -0,0 +1,144 @@
<workflow-app name="community_to_result_propagation_project" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="reset_outputpath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="reset_outputpath">
<fs>
<delete path="${outputPath}"/>
<mkdir path="${outputPath}"/>
</fs>
<ok to="copy_entities"/>
<error to="Kill"/>
</action>
<fork name="copy_entities">
<path start="copy_relation"/>
<path start="copy_organization"/>
<path start="copy_projects"/>
<path start="copy_datasources"/>
</fork>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${outputPath}/relation</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<action name="copy_organization">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/organization</arg>
<arg>${nameNode}/${outputPath}/organization</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<action name="copy_projects">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/project</arg>
<arg>${nameNode}/${outputPath}/project</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<action name="copy_datasources">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${sourcePath}/datasource</arg>
<arg>${nameNode}/${outputPath}/datasource</arg>
</distcp>
<ok to="copy_wait"/>
<error to="Kill"/>
</action>
<join name="copy_wait" to="prepare_result_communitylist"/>
<action name="prepare_result_communitylist">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare-Community-Result-Organization</name>
<class>eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=6
--executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--production</arg><arg>${production}</arg>
</spark>
<ok to="exec-propagation"/>
<error to="Kill"/>
</action>
<action name="exec-propagation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>community2resultfromproject</name>
<class>eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
--executor-cores=6
--executor-memory=5G
--conf spark.executor.memoryOverhead=3g
--conf spark.sql.shuffle.partitions=3284
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
</spark-opts>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -31,8 +31,6 @@ public class BulkTagJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\"," public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\","
+ " \"title\" : \"$['title'][*]['value']\"," + " \"title\" : \"$['title'][*]['value']\","
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
@ -42,7 +40,9 @@ public class BulkTagJobTest {
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," + "\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"," +
"\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," + "\"sdg\" : \"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"," +
"\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " + "\"hostedby\" : \"$['instance'][*]['hostedby']['key']\" , " +
"\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"} "; "\"collectedfrom\" : \"$['instance'][*]['collectedfrom']['key']\"," +
"\"publisher\":\"$['publisher'].value\"," +
"\"publicationyear\":\"$['dateofacceptance'].value\"} ";
private static SparkSession spark; private static SparkSession spark;
@ -98,14 +98,11 @@ public class BulkTagJobTest {
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -133,19 +130,16 @@ public class BulkTagJobTest {
@Test @Test
void bulktagBySubjectNoPreviousContextTest() throws Exception { void bulktagBySubjectNoPreviousContextTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/")
.getPath(); .getPath();
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/",
"-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -230,19 +224,19 @@ public class BulkTagJobTest {
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
.getPath(); .getPath();
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -311,18 +305,18 @@ public class BulkTagJobTest {
@Test @Test
void bulktagByDatasourceTest() throws Exception { void bulktagByDatasourceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/publication", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -384,25 +378,25 @@ public class BulkTagJobTest {
void bulktagByZenodoCommunityTest() throws Exception { void bulktagByZenodoCommunityTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/orp", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<OtherResearchProduct> tmp = sc JavaRDD<OtherResearchProduct> tmp = sc
.textFile(workingDir.toString() + "/orp") .textFile(workingDir.toString() + "/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)); .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
Assertions.assertEquals(10, tmp.count()); Assertions.assertEquals(10, tmp.count());
@ -505,18 +499,18 @@ public class BulkTagJobTest {
@Test @Test
void bulktagBySubjectDatasourceTest() throws Exception { void bulktagBySubjectDatasourceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -539,6 +533,7 @@ public class BulkTagJobTest {
+ "where MyD.inferenceprovenance = 'bulktagging'"; + "where MyD.inferenceprovenance = 'bulktagging'";
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query); org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
Assertions.assertEquals(7, idExplodeCommunity.count()); Assertions.assertEquals(7, idExplodeCommunity.count());
Assertions Assertions
@ -636,14 +631,14 @@ public class BulkTagJobTest {
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(), getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -732,18 +727,18 @@ public class BulkTagJobTest {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -774,19 +769,19 @@ public class BulkTagJobTest {
void bulkTagOtherJupyter() throws Exception { void bulkTagOtherJupyter() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct") "/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -829,18 +824,18 @@ public class BulkTagJobTest {
public void bulkTagDatasetJupyter() throws Exception { public void bulkTagDatasetJupyter() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/dataset") "/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -878,18 +873,18 @@ public class BulkTagJobTest {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/jupyter/software") "/eu/dnetlib/dhp/eosctag/jupyter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1096,18 +1091,18 @@ public class BulkTagJobTest {
void galaxyOtherTest() throws Exception { void galaxyOtherTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct") "/eu/dnetlib/dhp/eosctag/galaxy/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1214,18 +1209,18 @@ public class BulkTagJobTest {
void galaxySoftwareTest() throws Exception { void galaxySoftwareTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/galaxy/software") "/eu/dnetlib/dhp/eosctag/galaxy/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1333,19 +1328,19 @@ public class BulkTagJobTest {
void twitterDatasetTest() throws Exception { void twitterDatasetTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/twitter/dataset") "/eu/dnetlib/dhp/eosctag/twitter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1373,19 +1368,19 @@ public class BulkTagJobTest {
void twitterOtherTest() throws Exception { void twitterOtherTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct") "/eu/dnetlib/dhp/eosctag/twitter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/otherresearchproduct", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1418,19 +1413,19 @@ public class BulkTagJobTest {
void twitterSoftwareTest() throws Exception { void twitterSoftwareTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/eosctag/twitter/software") "/eu/dnetlib/dhp/eosctag/twitter/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/software", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1455,19 +1450,19 @@ public class BulkTagJobTest {
void EoscContextTagTest() throws Exception { void EoscContextTagTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json") "/eu/dnetlib/dhp/bulktag/eosc/dataset/")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
@ -1533,16 +1528,16 @@ public class BulkTagJobTest {
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass() getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
.getPath(), .getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1568,4 +1563,41 @@ public class BulkTagJobTest {
} }
@Test
void newConfTest() throws Exception {
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-outputPath", workingDir.toString() + "/",
// "-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", pathMap,
"-taggingConf", taggingConf
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
verificationDataset.createOrReplaceTempView("dataset");
String query = "select id, MyT.id community "
+ "from dataset "
+ "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'";
Assertions.assertEquals(0, spark.sql(query).count());
}
} }

View File

@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
sc.setVerb("not_contains"); sc.setVerb("not_contains");
sc.setField("contributor"); sc.setField("contributor");
sc.setValue("DARIAH"); sc.setValue("DARIAH");
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue())); sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
Assertions.assertFalse(sc.verifyCriteria(metadata)); Assertions.assertFalse(sc.verifyCriteria(metadata));
} }

View File

@ -72,15 +72,13 @@ public class ResultToCommunityJobTest {
SparkResultToCommunityFromOrganizationJob SparkResultToCommunityFromOrganizationJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass() "-sourcePath", getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample") .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/")
.getPath(), .getPath(),
"-hive_metastore_uris", "",
"-saveGraph", "true", "-outputPath", workingDir.toString() + "/",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset",
"-preparedInfoPath", preparedInfoPath "-preparedInfoPath", preparedInfoPath
}); });

View File

@ -0,0 +1,133 @@
package eu.dnetlib.dhp.resulttocommunityfromproject;
import static org.apache.spark.sql.functions.desc;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Dataset;
public class ResultToCommunityJobTest {
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(OrcidPropagationJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testSparkResultToCommunityFromProjectJob() throws Exception {
final String preparedInfoPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
.getPath();
SparkResultToCommunityFromProject
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
.getPath(),
"-outputPath", workingDir.toString() + "/",
"-preparedInfoPath", preparedInfoPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count());
/**
* {"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
* {"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
* {"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
* {"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}
*/
List<Context> context = tmp
.filter(r -> r.getId().equals("50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f"))
.first()
.getContext();
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
context = tmp
.filter(r -> r.getId().equals("50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e"))
.first()
.getContext();
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
Assertions
.assertEquals(
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1")).count());
Assertions
.assertEquals(
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1")).count());
Assertions
.assertEquals(
2, tmp.filter(r -> r.getContext().stream().anyMatch(c -> c.getId().equals("aurora"))).count());
}
private static boolean containsResultCommunityProject(Context c) {
return c
.getDataInfo()
.stream()
.anyMatch(di -> di.getProvenanceaction().getClassid().equals("result:community:project"));
}
}

View File

@ -26,7 +26,7 @@
<subjects/> <subjects/>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId> <openaireId>10|re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -140,39 +140,39 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId> <openaireId>10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId> <openaireId>10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId> <openaireId>10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId> <openaireId>10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId> <openaireId>10|doajarticles::0da84e9dfdc8419576169e027baa8028</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId> <openaireId>10|re3data_____::84e123776089ce3c7a33db98d9cd15a8</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::c5502a43e76feab55dd00cf50f519125</openaireId> <openaireId>10|openaire____::c5502a43e76feab55dd00cf50f519125</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::a48f09c562b247a9919acfe195549b47</openaireId> <openaireId>10|re3data_____::a48f09c562b247a9919acfe195549b47</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::97275a23ca44226c9964043c8462be96</openaireId> <openaireId>10|opendoar____::97275a23ca44226c9964043c8462be96</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -287,55 +287,55 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId> <openaireId>10|doajarticles::8cec81178926caaca531afbd8eb5d64c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId> <openaireId>10|doajarticles::0f7a7f30b5400615cae1829f3e743982</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId> <openaireId>10|doajarticles::9740f7f5af3e506d2ad2c215cdccd51a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId> <openaireId>10|doajarticles::9f3fbaae044fa33cb7069b72935a3254</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId> <openaireId>10|doajarticles::cb67f33eb9819f5c624ce0313957f6b3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId> <openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId> <openaireId>10|doajarticles::554cde3be9e5c4588b4c4f9f503120cb</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId> <openaireId>10|tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId> <openaireId>10|doajarticles::57c5d3837da943e93b28ec4db82ec7a5</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId> <openaireId>10|doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e21c97cbb7a209afc75703681c462906</openaireId> <openaireId>10|doajarticles::e21c97cbb7a209afc75703681c462906</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::dca64612dfe0963fffc119098a319957</openaireId> <openaireId>10|doajarticles::dca64612dfe0963fffc119098a319957</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId> <openaireId>10|doajarticles::dd70e44479f0ade25aa106aef3e87a0a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -406,27 +406,27 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId> <openaireId>10|re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId> <openaireId>10|doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::8515794670370f49c1d176c399c714f5</openaireId> <openaireId>10|re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId> <openaireId>10|doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId> <openaireId>10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId> <openaireId>10|rest________::fb1a3d4523c95e63496e3bc7ba36244b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -743,27 +743,27 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId> <openaireId>10|opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId> <openaireId>10|opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId> <openaireId>10|opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId> <openaireId>10|opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId> <openaireId>10|opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId> <openaireId>10|opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -983,11 +983,11 @@
<subjects/> <subjects/>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId> <openaireId>10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId> <openaireId>10|opendoar____::96da2f590cd7246bbde0051047b0d6f7</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>
@ -1166,87 +1166,87 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId> <openaireId>10|doajarticles::1c5bdf8fca58937894ad1441cca99b76</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId> <openaireId>10|doajarticles::b37a634324a45c821687e6e80e6f53b4</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId> <openaireId>10|doajarticles::4bf64f2a104040e4e055cd9594b2d77c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::479ca537c12755d1868bbf02938a900c</openaireId> <openaireId>10|doajarticles::479ca537c12755d1868bbf02938a900c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId> <openaireId>10|doajarticles::55f31df96a60e2309f45b7c265fcf7a2</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId> <openaireId>10|doajarticles::c52a09891a5301f9986ebbfe3761810c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId> <openaireId>10|doajarticles::379807bc7f6c71a227ef1651462c414c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId> <openaireId>10|doajarticles::36069db531a00b85a2e8fb301f4bdc19</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId> <openaireId>10|doajarticles::b6a898da311ded96fabf49c520b80d5d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId> <openaireId>10|doajarticles::d0753d9180b35a271d8b4a31f449749f</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::172050a92511838393a3fe237ae47e31</openaireId> <openaireId>10|doajarticles::172050a92511838393a3fe237ae47e31</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId> <openaireId>10|doajarticles::301ed96c62abb160a3e29796efe5c95c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId> <openaireId>10|doajarticles::0f4f805b3d842f2c7f1b077c3426fa59</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId> <openaireId>10|doajarticles::ba73728b84437b8d48ae287b867c7215</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::86faef424d804309ccf45f692523aa48</openaireId> <openaireId>10|doajarticles::86faef424d804309ccf45f692523aa48</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::73bd758fa41671de70964c3ecba013af</openaireId> <openaireId>10|doajarticles::73bd758fa41671de70964c3ecba013af</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId> <openaireId>10|doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId> <openaireId>10|doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId> <openaireId>10|doajarticles::ca61df07089acc53a1569bde6673d82a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId> <openaireId>10|doajarticles::237dd6f1606600459d0297abd8ed9976</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId> <openaireId>10|doajarticles::fba6191177ede7c51ea1cdf58eae7f8b</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -1345,87 +1345,87 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId> <openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId> <openaireId>10|doajarticles::ae4c7286c79590f19fdca670156ce816</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId> <openaireId>10|doajarticles::0f664bce92ce953e0c7a92068c46bfb3</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::00017183dc4c858fb77541985323a4ef</openaireId> <openaireId>10|doajarticles::00017183dc4c858fb77541985323a4ef</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId> <openaireId>10|doajarticles::93b306f458cce3d7aaaf58c0a725f4f9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId> <openaireId>10|doajarticles::9dbf8fbf3e9fe0fe1fc01e55fbd90bfc</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId> <openaireId>10|doajarticles::a2bda8785c863279bba4b8f34827b4c9</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId> <openaireId>10|doajarticles::019a1fcb42c3fea1c1b689df76330b58</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId> <openaireId>10|doajarticles::0daa8281938831e9c82bfed8b55a2975</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::f67ad6d268162079b3abd51a24468744</openaireId> <openaireId>10|doajarticles::f67ad6d268162079b3abd51a24468744</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId> <openaireId>10|doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId> <openaireId>10|doajarticles::ad114356e196a4a3d84dda59c720dacd</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId> <openaireId>10|doajarticles::01e8a54fdecaaf354c67a2dd74ae7d4f</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId> <openaireId>10|doajarticles::449305f096b10a9464449ff2d0e10e06</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId> <openaireId>10|doajarticles::982c0c0ac378256254cce2fa6572bb6c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId> <openaireId>10|doajarticles::49d6ed47138884566ce93cf0ccb12c02</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId> <openaireId>10|doajarticles::a98e820dbc2e8ee0fc84ab66f263267c</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId> <openaireId>10|doajarticles::50b1ce37427b36368f8f0f1317e47f83</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId> <openaireId>10|doajarticles::f0ec29b7450b2ac5d0ad45327eeb531a</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId> <openaireId>10|doajarticles::d8d421d3b0349a7aaa93758b27a54e84</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId> <openaireId>10|doajarticles::7ffc35ac5133da01d421ccf8af5b70bc</openaireId>
<selcriteria/> <selcriteria/>
</datasource> </datasource>
</datasources> </datasources>
@ -1454,81 +1454,81 @@
</subjects> </subjects>
<datasources> <datasources>
<datasource> <datasource>
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId> <openaireId>10|opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId> <openaireId>10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId> <openaireId>10|driver______::bee53aa31dc2cbb538c10c2b65fa5824</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId> <openaireId>10|openaire____::437f4b072b1aa198adcbc35910ff3b98</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId> <openaireId>10|openaire____::081b82f96300b6a6e3d282bad31cb6e2</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId> <openaireId>10|openaire____::9e3be59865b2c1c335d32dae2fe7b254</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId> <openaireId>10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId> <openaireId>10|share_______::4719356ec8d7d55d3feb384ce879ad6c</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId> <openaireId>10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId> <openaireId>10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]}, <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]}, {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},
{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]} {"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCoV"}]}]}
</selcriteria> </selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId> <openaireId>10|re3data_____::7980778c78fb4cf0fab13ce2159030dc</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
<datasource> <datasource>
<openaireId>re3data_____::978378def740bbf2bfb420de868c460b</openaireId> <openaireId>10|re3data_____::978378def740bbf2bfb420de868c460b</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria> <selcriteria>{"criteria":[{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_caseinsensitive","field":"title","value":"2019-nCov"}]}]}</selcriteria>
</datasource> </datasource>
</datasources> </datasources>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More