- Removed ModelConstants and update Relation enum

This commit is contained in:
Sandro La Bruzzo 2023-05-03 14:03:01 +02:00
parent f8f4b9a018
commit 88fffa6dbd
58 changed files with 306 additions and 312 deletions

View File

@ -161,9 +161,9 @@ public class CreateActionSetSparkJob implements Serializable {
r.setProvenance(PROVENANCE); r.setProvenance(PROVENANCE);
r.setSource(source); r.setSource(source);
r.setTarget(target); r.setTarget(target);
r.setRelType(ModelConstants.RESULT_RESULT); r.setRelType(Relation.RELTYPE.resultResult);
r.setSubRelType(ModelConstants.CITATION); r.setSubRelType(Relation.SUBRELTYPE.citation);
r.setRelClass(ModelConstants.CITES); r.setRelClass(Relation.RELCLASS.Cites);
return r; return r;
} }

View File

@ -648,11 +648,11 @@ object DataciteToOAFTransformation {
(r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference")) (r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference"))
) )
.map(r => { .map(r => {
r.relationType match { Relation.RELCLASS.valueOf(r.relationType) match {
case Relation.RELCLASS.Cites.toString | Relation.RELCLASS.References.toString => case Relation.RELCLASS.Cites | Relation.RELCLASS.References =>
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date) relation(id, target, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date)
case Relation.RELCLASS.IsCitedBy.toString | Relation.RELCLASS.IsReferencedBy.toString => case Relation.RELCLASS.IsCitedBy | Relation.RELCLASS.IsReferencedBy =>
val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(source, id, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date) relation(source, id, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date)
} }

View File

@ -2,13 +2,13 @@ package eu.dnetlib.dhp.sx.bio
import com.google.common.collect.Lists import com.google.common.collect.Lists
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.jackson.JsonMethods.{compact, parse, render}
import collection.JavaConverters._ import scala.collection.JavaConverters._
object BioDBToOAF { object BioDBToOAF {
@ -127,8 +127,8 @@ object BioDBToOAF {
target_pid_type, target_pid_type,
generate_unresolved_id(source_pid, source_pid_type), generate_unresolved_id(source_pid, source_pid_type),
collectedFromMap("elsevier"), collectedFromMap("elsevier"),
"relationship", Relation.SUBRELTYPE.relationship,
relation_semantic, Relation.RELCLASS.lookUp(relation_semantic),
date date
) )
@ -323,8 +323,8 @@ object BioDBToOAF {
"pmid", "pmid",
d.getId, d.getId,
collectedFromMap("uniprot"), collectedFromMap("uniprot"),
ModelConstants.RELATIONSHIP, Relation.SUBRELTYPE.relationship,
ModelConstants.IS_RELATED_TO, Relation.RELCLASS.IsRelatedTo,
if (i_date.isDefined) i_date.get.date else null if (i_date.isDefined) i_date.get.date else null
) )
rel.getProvenance.asScala.map(p => p.getCollectedfrom) rel.getProvenance.asScala.map(p => p.getCollectedfrom)
@ -335,8 +335,8 @@ object BioDBToOAF {
"doi", "doi",
d.getId, d.getId,
collectedFromMap("uniprot"), collectedFromMap("uniprot"),
ModelConstants.RELATIONSHIP, Relation.SUBRELTYPE.relationship,
ModelConstants.IS_RELATED_TO, Relation.RELCLASS.IsRelatedTo,
if (i_date.isDefined) i_date.get.date else null if (i_date.isDefined) i_date.get.date else null
) )
List(d, rel) List(d, rel)
@ -353,8 +353,8 @@ object BioDBToOAF {
pidType: String, pidType: String,
sourceId: String, sourceId: String,
collectedFrom: KeyValue, collectedFrom: KeyValue,
subRelType: String, subRelType: Relation.SUBRELTYPE,
relClass: String, relClass: Relation.RELCLASS,
date: String date: String
): Relation = { ): Relation = {
@ -370,7 +370,7 @@ object BioDBToOAF {
rel.setProvenance(provenance) rel.setProvenance(provenance)
rel.setRelType(ModelConstants.RESULT_RESULT) rel.setRelType(Relation.RELTYPE.resultResult)
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
rel.setRelClass(relClass) rel.setRelClass(relClass)
@ -398,10 +398,11 @@ object BioDBToOAF {
pidType, pidType,
sourceId, sourceId,
collectedFrom, collectedFrom,
ModelConstants.SUPPLEMENT, Relation.SUBRELTYPE.supplement,
ModelConstants.IS_SUPPLEMENT_TO, Relation.RELCLASS.IsSupplementTo,
date date
) )
} }
def pdbTOOaf(input: String): List[Oaf] = { def pdbTOOaf(input: String): List[Oaf] = {
@ -573,8 +574,8 @@ object BioDBToOAF {
"pmid", "pmid",
d.getId, d.getId,
collectedFromMap("ebi"), collectedFromMap("ebi"),
ModelConstants.RELATIONSHIP, Relation.SUBRELTYPE.relationship,
ModelConstants.IS_RELATED_TO, Relation.RELCLASS.IsRelatedTo,
GraphCleaningFunctions.cleanDate(input.date) GraphCleaningFunctions.cleanDate(input.date)
) )
) )

View File

@ -326,7 +326,7 @@ public class CreateOpenCitationsASTest {
}); });
assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count()); assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count());
check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass())); check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(Relation.RELCLASS.Cites, r.getRelClass()));
} }
} }

View File

@ -65,7 +65,7 @@ public class PrepareGroupsJob {
final Dataset<Relation> mergedRels = ClusterUtils final Dataset<Relation> mergedRels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); .filter((FilterFunction<Relation>) r -> r.getRelClass().equals(Relation.RELCLASS.isMergedIn));
final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator() final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator()
.toColumn(); .toColumn();

View File

@ -68,7 +68,7 @@ public class PrepareRelatedDatasetsJob {
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultResult))
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
@ -78,7 +78,7 @@ public class PrepareRelatedDatasetsJob {
.map((MapFunction<Tuple2<Relation, OaBrokerRelatedDataset>, RelatedDataset>) t -> { .map((MapFunction<Tuple2<Relation, OaBrokerRelatedDataset>, RelatedDataset>) t -> {
final RelatedDataset rel = new RelatedDataset(t._1.getSource(), final RelatedDataset rel = new RelatedDataset(t._1.getSource(),
t._2); t._2);
rel.getRelDataset().setRelType(t._1.getRelClass()); rel.getRelDataset().setRelType(t._1.getRelClass().toString());
return rel; return rel;
}, Encoders.bean(RelatedDataset.class)); }, Encoders.bean(RelatedDataset.class));

View File

@ -68,10 +68,11 @@ public class PrepareRelatedProjectsJob {
(MapFunction<Project, OaBrokerProject>) ConversionUtils::oafProjectToBrokerProject, (MapFunction<Project, OaBrokerProject>) ConversionUtils::oafProjectToBrokerProject,
Encoders.bean(OaBrokerProject.class)); Encoders.bean(OaBrokerProject.class));
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultProject))
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(Relation.RELCLASS.isMergedIn))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));

View File

@ -69,7 +69,7 @@ public class PrepareRelatedPublicationsJob {
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultResult))
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
@ -79,7 +79,7 @@ public class PrepareRelatedPublicationsJob {
.map((MapFunction<Tuple2<Relation, OaBrokerRelatedPublication>, RelatedPublication>) t -> { .map((MapFunction<Tuple2<Relation, OaBrokerRelatedPublication>, RelatedPublication>) t -> {
final RelatedPublication rel = new RelatedPublication( final RelatedPublication rel = new RelatedPublication(
t._1.getSource(), t._2); t._1.getSource(), t._2);
rel.getRelPublication().setRelType(t._1.getRelClass()); rel.getRelPublication().setRelType(t._1.getRelClass().toString());
return rel; return rel;
}, Encoders.bean(RelatedPublication.class)); }, Encoders.bean(RelatedPublication.class));

View File

@ -73,8 +73,8 @@ public class PrepareRelatedSoftwaresJob {
final Dataset<Relation> rels; final Dataset<Relation> rels;
rels = ClusterUtils rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultResult))
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(Relation.RELCLASS.isMergedIn))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset {
@ -12,7 +12,8 @@ public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDat
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_REFERENCED_BY);
return relType.equals(Relation.RELCLASS.IsReferencedBy);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset {
@ -12,7 +12,8 @@ public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDatase
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_RELATED_TO);
return relType.equals(Relation.RELCLASS.IsRelatedTo);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset {
@ -12,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingD
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); return relType.equals(Relation.RELCLASS.IsSupplementedBy);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset {
@ -12,7 +12,8 @@ public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingD
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_SUPPLEMENT_TO);
return relType.equals(Relation.RELCLASS.IsSupplementTo);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset {
@ -12,7 +12,9 @@ public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.REFERENCES);
return relType.equals(Relation.RELCLASS.References);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication {
@ -12,6 +12,6 @@ public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissin
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_REFERENCED_BY); return relType.equals(Relation.RELCLASS.IsReferencedBy);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication {
@ -12,7 +12,7 @@ public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPu
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_RELATED_TO); return relType.equals(Relation.RELCLASS.IsRelatedTo);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication {
@ -12,6 +12,6 @@ public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMiss
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); return relType.equals(Relation.RELCLASS.IsSupplementedBy);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication {
@ -12,7 +12,7 @@ public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMiss
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); return relType.equals(Relation.RELCLASS.IsSupplementTo);
} }
} }

View File

@ -2,7 +2,7 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation;
public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication {
@ -12,7 +12,7 @@ public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPub
@Override @Override
protected boolean filterByType(final String relType) { protected boolean filterByType(final String relType) {
return relType.equals(ModelConstants.REFERENCES); return relType.equals(Relation.RELCLASS.References);
} }
} }

View File

@ -16,7 +16,6 @@ public class BrokerConstants {
} }
public static final String OPEN_ACCESS = "OPEN"; public static final String OPEN_ACCESS = "OPEN";
public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN;
public static final String COLLECTED_FROM_REL = "collectedFrom"; public static final String COLLECTED_FROM_REL = "collectedFrom";

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.HashSet; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import java.util.Set; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.Relation;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
@ -13,12 +13,9 @@ import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import com.fasterxml.jackson.databind.ObjectMapper; import java.util.Arrays;
import java.util.HashSet;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import java.util.Set;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class ClusterUtils { public class ClusterUtils {
@ -59,12 +56,12 @@ public class ClusterUtils {
return id.contains("dedup"); return id.contains("dedup");
} }
public static final boolean isValidResultResultClass(final String s) { public static final boolean isValidResultResultClass(final Relation.RELCLASS r) {
return s.equals(ModelConstants.IS_REFERENCED_BY) return r.equals(Relation.RELCLASS.IsReferencedBy)
|| s.equals(ModelConstants.IS_RELATED_TO) || r.equals(Relation.RELCLASS.References)
|| s.equals(ModelConstants.REFERENCES) || r.equals(Relation.RELCLASS.IsRelatedTo)
|| s.equals(ModelConstants.IS_SUPPLEMENTED_BY) || r.equals(Relation.RELCLASS.IsSupplementTo)
|| s.equals(ModelConstants.IS_SUPPLEMENT_TO); || r.equals(Relation.RELCLASS.IsSupplementedBy);
} }
public static <T> T incrementAccumulator(final T o, final LongAccumulator acc) { public static <T> T incrementAccumulator(final T o, final LongAccumulator acc) {

View File

@ -100,7 +100,7 @@ public class ConversionUtils {
res.setOpenaireId(cleanOpenaireId(result.getId())); res.setOpenaireId(cleanOpenaireId(result.getId()));
res.setOriginalId(first(result.getOriginalId())); res.setOriginalId(first(result.getOriginalId()));
res.setTypology(result.getResulttype()); res.setTypology(result.getResulttype().toString());
res.setTitles(structPropList(result.getTitle())); res.setTitles(structPropList(result.getTitle()));
res.setAbstracts(result.getDescription()); res.setAbstracts(result.getDescription());
res.setLanguage(classId(result.getLanguage())); res.setLanguage(classId(result.getLanguage()));
@ -112,7 +112,7 @@ public class ConversionUtils {
res.setContributor(result.getContributor()); res.setContributor(result.getContributor());
res res
.setJournal( .setJournal(
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null); result instanceof Publication ? oafJournalToBrokerJournal(result.getJournal()) : null);
res.setPids(allResultPids(result)); res.setPids(allResultPids(result));
res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances)); res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances));
res res

View File

@ -153,10 +153,12 @@ abstract class AbstractSparkAction implements Serializable {
} }
private boolean isOpenOrgsDedupMergeRelation(Relation rel) { private boolean isOpenOrgsDedupMergeRelation(Relation rel) {
return ModelConstants.ORG_ORG_RELTYPE.equals(rel.getRelType()) &&
ModelConstants.DEDUP.equals(rel.getSubRelType())
&& (ModelConstants.IS_MERGED_IN.equals(rel.getRelClass()) || return Relation.RELTYPE.organizationOrganization.equals(rel.getRelType()) &&
ModelConstants.MERGES.equals(rel.getRelClass())); Relation.SUBRELTYPE.dedup.equals(rel.getSubRelType())
&& (Relation.RELCLASS.isMergedIn.equals(rel.getRelClass()) ||
Relation.RELCLASS.merges.equals(rel.getRelClass()));
} }
protected static Boolean parseECField(String field) { protected static Boolean parseECField(String field) {

View File

@ -157,17 +157,18 @@ public class DedupUtility {
public static Relation createSimRel(String source, String target, String entity) { public static Relation createSimRel(String source, String target, String entity) {
final Relation r = new Relation(); final Relation r = new Relation();
r.setSource(source); r.setSource(source);
r.setTarget(target); r.setTarget(target);
r.setSubRelType("dedupSimilarity"); r.setSubRelType(Relation.SUBRELTYPE.dedup);
r.setRelClass(ModelConstants.IS_SIMILAR_TO); r.setRelClass(Relation.RELCLASS.isSimilarTo);
switch (entity) { switch (entity) {
case "result": case "result":
r.setRelType(ModelConstants.RESULT_RESULT); r.setRelType(Relation.RELTYPE.resultResult);
break; break;
case "organization": case "organization":
r.setRelType(ModelConstants.ORG_ORG_RELTYPE); r.setRelType(Relation.RELTYPE.organizationOrganization);
break; break;
default: default:
throw new IllegalArgumentException("unmanaged entity type: " + entity); throw new IllegalArgumentException("unmanaged entity type: " + entity);

View File

@ -90,9 +90,9 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
} }
private boolean isMergeRel(Relation rel) { private boolean isMergeRel(Relation rel) {
return (rel.getRelClass().equals(ModelConstants.MERGES) return (rel.getRelClass().equals(Relation.RELCLASS.merges)
|| rel.getRelClass().equals(ModelConstants.IS_MERGED_IN)) || rel.getRelClass().equals(Relation.RELCLASS.isMergedIn))
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE) && rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
&& rel.getSubRelType().equals(ModelConstants.DEDUP); && rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup);
} }
} }

View File

@ -81,9 +81,9 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
} }
private boolean filterOpenorgsRels(Relation rel) { private boolean filterOpenorgsRels(Relation rel) {
return rel.getRelClass().equals(ModelConstants.IS_SIMILAR_TO) return rel.getRelClass().equals(Relation.RELCLASS.isSimilarTo)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE) && rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
&& rel.getSubRelType().equals(ModelConstants.DEDUP) && isOpenorgs(rel); && rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup) && isOpenorgs(rel);
} }
} }

View File

@ -110,7 +110,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
.load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity))
.as(Encoders.bean(Relation.class)) .as(Encoders.bean(Relation.class))
.javaRDD() .javaRDD()
.map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass())) .map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass().toString()))
.rdd(); .rdd();
Dataset<Tuple2<String, String>> rawMergeRels = spark Dataset<Tuple2<String, String>> rawMergeRels = spark
@ -199,14 +199,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
id -> { id -> {
List<Relation> rels = new ArrayList<>(); List<Relation> rels = new ArrayList<>();
rels.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf)); rels.add(rel(cc.getCcId(), id, Relation.RELCLASS.merges, dedupConf));
return rels.stream(); return rels.stream();
}) })
.iterator(); .iterator();
} }
private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) { // TODO NEED to REVIEW THIS FUNCTION, THE UTILITY FUNCTION SHOULD BE MOVED ON SOME SUPPORT CLASS OR REUSE OTHER FUNCTION
private Relation rel(String source, String target, Relation.RELCLASS relClass, DedupConfig dedupConf) {
String entityType = dedupConf.getWf().getEntityType(); String entityType = dedupConf.getWf().getEntityType();
@ -214,8 +215,8 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
r.setSource(source); r.setSource(source);
r.setTarget(target); r.setTarget(target);
r.setRelClass(relClass); r.setRelClass(relClass);
r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1)); r.setRelType(Relation.RELTYPE.valueOf(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1)));
r.setSubRelType(ModelConstants.DEDUP); r.setSubRelType(Relation.SUBRELTYPE.dedup);
DataInfo info = new DataInfo(); DataInfo info = new DataInfo();

View File

@ -191,15 +191,15 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
switch (entityType) { switch (entityType) {
case "result": case "result":
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM) if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
&& rel.getRelType().equals(ModelConstants.RESULT_RESULT) && rel.getRelType().equals(Relation.RELTYPE.resultResult)
&& rel.getSubRelType().equals(ModelConstants.DEDUP)) && rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
return true; return true;
break; break;
case "organization": case "organization":
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM) if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE) && rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
&& rel.getSubRelType().equals(ModelConstants.DEDUP)) && rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
return true; return true;
break; break;
default: default:

View File

@ -106,15 +106,15 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
switch (entityType) { switch (entityType) {
case "result": case "result":
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM) if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
&& rel.getRelType().equals(ModelConstants.RESULT_RESULT) && rel.getRelType().equals(Relation.RELTYPE.resultResult)
&& rel.getSubRelType().equals(ModelConstants.DEDUP)) && rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
return true; return true;
break; break;
case "organization": case "organization":
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM) if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE) && rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
&& rel.getSubRelType().equals(ModelConstants.DEDUP)) && rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
return true; return true;
break; break;
default: default:

View File

@ -74,7 +74,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
// <mergedObjectID, dedupID> // <mergedObjectID, dedupID>
Dataset<Tuple2<String, String>> mergedIds = mergeRels Dataset<Tuple2<String, String>> mergedIds = mergeRels
.where(col("relClass").equalTo(ModelConstants.MERGES)) .where(col("relClass").equalTo(Relation.RELCLASS.merges))
.select(col("source"), col("target")) .select(col("source"), col("target"))
.distinct() .distinct()
.map( .map(
@ -111,7 +111,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.filter(getRelationFilterFunction()) .filter(getRelationFilterFunction())
.groupByKey( .groupByKey(
(MapFunction<Relation, String>) r -> String (MapFunction<Relation, String>) r -> String
.join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), .join(r.getSource(), r.getTarget(), r.getRelType().toString(), r.getSubRelType().toString(), r.getRelClass().toString()),
Encoders.STRING()) Encoders.STRING())
.agg(new RelationAggregator().toColumn()) .agg(new RelationAggregator().toColumn())
.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, Encoders.bean(Relation.class)); .map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, Encoders.bean(Relation.class));
@ -150,9 +150,9 @@ public class SparkPropagateRelation extends AbstractSparkAction {
private FilterFunction<Relation> getRelationFilterFunction() { private FilterFunction<Relation> getRelationFilterFunction() {
return r -> StringUtils.isNotBlank(r.getSource()) || return r -> StringUtils.isNotBlank(r.getSource()) ||
StringUtils.isNotBlank(r.getTarget()) || StringUtils.isNotBlank(r.getTarget()) ||
StringUtils.isNotBlank(r.getRelType()) || r.getRelType() != null ||
StringUtils.isNotBlank(r.getSubRelType()) || r.getSubRelType()!=null ||
StringUtils.isNotBlank(r.getRelClass()); r.getRelClass()!=null;
} }
private static String getId(Relation r, FieldType type) { private static String getId(Relation r, FieldType type) {

View File

@ -415,9 +415,9 @@ public class SparkDedupTest implements Serializable {
"50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032",
"50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c");
merges.forEach(r -> { merges.forEach(r -> {
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
assertEquals(ModelConstants.DEDUP, r.getSubRelType()); assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
assertEquals(ModelConstants.MERGES, r.getRelClass()); assertEquals(Relation.RELCLASS.merges, r.getRelClass());
assertTrue(dups.contains(r.getTarget())); assertTrue(dups.contains(r.getTarget()));
}); });
@ -426,9 +426,9 @@ public class SparkDedupTest implements Serializable {
.collectAsList(); .collectAsList();
assertEquals(3, mergedIn.size()); assertEquals(3, mergedIn.size());
mergedIn.forEach(r -> { mergedIn.forEach(r -> {
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
assertEquals(ModelConstants.DEDUP, r.getSubRelType()); assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); assertEquals(Relation.RELCLASS.isMergedIn, r.getRelClass());
assertTrue(dups.contains(r.getSource())); assertTrue(dups.contains(r.getSource()));
}); });

View File

@ -190,9 +190,9 @@ public class SparkPublicationRootsTest implements Serializable {
"50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032",
"50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c");
mergeList.forEach(r -> { mergeList.forEach(r -> {
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
assertEquals(ModelConstants.DEDUP, r.getSubRelType()); assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
assertEquals(ModelConstants.MERGES, r.getRelClass()); assertEquals(Relation.RELCLASS.merges, r.getRelClass());
assertTrue(dups.contains(r.getTarget())); assertTrue(dups.contains(r.getTarget()));
}); });
@ -201,9 +201,9 @@ public class SparkPublicationRootsTest implements Serializable {
.collectAsList(); .collectAsList();
assertEquals(3, mergedIn.size()); assertEquals(3, mergedIn.size());
mergedIn.forEach(r -> { mergedIn.forEach(r -> {
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
assertEquals(ModelConstants.DEDUP, r.getSubRelType()); assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); assertEquals(Relation.RELCLASS.isMergedIn, r.getRelClass());
assertTrue(dups.contains(r.getSource())); assertTrue(dups.contains(r.getSource()));
}); });

View File

@ -384,9 +384,9 @@ case object Crossref2Oaf {
val rel = new Relation val rel = new Relation
rel.setSource(sourceId) rel.setSource(sourceId)
rel.setTarget(targetId) rel.setTarget(targetId)
rel.setRelType(ModelConstants.RESULT_RESULT) rel.setRelType(Relation.RELTYPE.resultResult)
rel.setRelClass(ModelConstants.CITES) rel.setRelClass(Relation.RELCLASS.Cites)
rel.setSubRelType(ModelConstants.CITATION) rel.setSubRelType(Relation.SUBRELTYPE.citation)
rel.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, dataInfo))) rel.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, dataInfo)))
List(rel) List(rel)
@ -417,14 +417,14 @@ case object Crossref2Oaf {
null null
} }
def generateRelation(sourceId: String, targetId: String, relClass: String): Relation = { def generateRelation(sourceId: String, targetId: String, relClass: Relation.RELCLASS): Relation = {
val r = new Relation val r = new Relation
r.setSource(sourceId) r.setSource(sourceId)
r.setTarget(targetId) r.setTarget(targetId)
r.setRelType(ModelConstants.RESULT_PROJECT) r.setRelType(Relation.RELTYPE.resultProject)
r.setRelClass(relClass) r.setRelClass(relClass)
r.setSubRelType(ModelConstants.OUTCOME) r.setSubRelType(Relation.SUBRELTYPE.outcome)
r.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, dataInfo))) r.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, dataInfo)))
r r
@ -435,14 +435,15 @@ case object Crossref2Oaf {
nsPrefix: String, nsPrefix: String,
extractField: String => String extractField: String => String
): Unit = { ): Unit = {
if (funder.award.isDefined && funder.award.get.nonEmpty) if (funder.award.isDefined && funder.award.get.nonEmpty)
funder.award.get funder.award.get
.map(extractField) .map(extractField)
.filter(a => a != null && a.nonEmpty) .filter(a => a != null && a.nonEmpty)
.foreach(award => { .foreach(award => {
val targetId = getProjectId(nsPrefix, DHPUtils.md5(award)) val targetId = getProjectId(nsPrefix, DHPUtils.md5(award))
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
}) })
} }
@ -471,21 +472,21 @@ case object Crossref2Oaf {
case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
case "10.13039/501100000038" => case "10.13039/501100000038" =>
val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
case "10.13039/501100000155" => case "10.13039/501100000155" =>
val targetId = getProjectId("sshrc_______", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("sshrc_______", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
case "10.13039/501100000024" => case "10.13039/501100000024" =>
val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
case "10.13039/100020031" => case "10.13039/100020031" =>
val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
case "10.13039/501100005416" => generateSimpleRelationFromAward(funder, "rcn_________", a => a) case "10.13039/501100005416" => generateSimpleRelationFromAward(funder, "rcn_________", a => a)
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
@ -495,8 +496,8 @@ case object Crossref2Oaf {
case "10.13039/501100003407" => case "10.13039/501100003407" =>
generateSimpleRelationFromAward(funder, "miur________", a => a) generateSimpleRelationFromAward(funder, "miur________", a => a)
val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
case "10.13039/501100006588" | "10.13039/501100004488" => case "10.13039/501100006588" | "10.13039/501100004488" =>
generateSimpleRelationFromAward( generateSimpleRelationFromAward(
funder, funder,
@ -509,15 +510,15 @@ case object Crossref2Oaf {
case "10.13039/100004440" => case "10.13039/100004440" =>
generateSimpleRelationFromAward(funder, "wt__________", a => a) generateSimpleRelationFromAward(funder, "wt__________", a => a)
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
//ASAP //ASAP
case "10.13039/100018231" => generateSimpleRelationFromAward(funder, "asap________", a => a) case "10.13039/100018231" => generateSimpleRelationFromAward(funder, "asap________", a => a)
//CHIST-ERA //CHIST-ERA
case "10.13039/501100001942" => case "10.13039/501100001942" =>
val targetId = getProjectId("chistera____", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("chistera____", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
//HE //HE
case "10.13039/100018693" | "10.13039/100018694" | "10.13039/100019188" | "10.13039/100019180" | case "10.13039/100018693" | "10.13039/100018694" | "10.13039/100019188" | "10.13039/100019180" |
"10.13039/100018695" | "10.13039/100019185" | "10.13039/100019186" | "10.13039/100019187" => "10.13039/100018695" | "10.13039/100019185" | "10.13039/100019186" | "10.13039/100019187" =>
@ -559,8 +560,8 @@ case object Crossref2Oaf {
case "Wellcome Trust Masters Fellowship" => case "Wellcome Trust Masters Fellowship" =>
generateSimpleRelationFromAward(funder, "wt__________", a => a) generateSimpleRelationFromAward(funder, "wt__________", a => a)
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
case _ => logger.debug("no match for " + funder.name) case _ => logger.debug("no match for " + funder.name)
} }

View File

@ -144,7 +144,7 @@ class CrossrefMappingTest {
val relationList: List[Relation] = result val relationList: List[Relation] = result
.filter(s => s.isInstanceOf[Relation]) .filter(s => s.isInstanceOf[Relation])
.map(r => r.asInstanceOf[Relation]) .map(r => r.asInstanceOf[Relation])
.filter(r => r.getSubRelType.equalsIgnoreCase(ModelConstants.CITATION)) .filter(r => r.getSubRelType.equalsIgnoreCase(Relation.SUBRELTYPE.citation))
assertNotNull(relationList) assertNotNull(relationList)
assertFalse(relationList.isEmpty) assertFalse(relationList.isEmpty)

View File

@ -112,15 +112,16 @@ public class PropagationConstant {
String className String className
) { ) {
ArrayList<Relation> newRelations = new ArrayList(); ArrayList<Relation> newRelations = new ArrayList();
newRelations newRelations
.add( .add(
getRelation( getRelation(
orgId, orgId,
resultId, resultId,
ModelConstants.IS_AUTHOR_INSTITUTION_OF, Relation.RELCLASS.isAuthorInstitutionOf,
ModelConstants.RESULT_ORGANIZATION, Relation.RELTYPE.resultOrganization,
ModelConstants.AFFILIATION, Relation.SUBRELTYPE.affiliation,
PROPAGATION_DATA_INFO_TYPE, PROPAGATION_DATA_INFO_TYPE,
classID, classID,
className)); className));
@ -129,9 +130,9 @@ public class PropagationConstant {
getRelation( getRelation(
resultId, resultId,
orgId, orgId,
ModelConstants.HAS_AUTHOR_INSTITUTION, Relation.RELCLASS.hasAuthorInstitution,
ModelConstants.RESULT_ORGANIZATION, Relation.RELTYPE.resultOrganization,
ModelConstants.AFFILIATION, Relation.SUBRELTYPE.affiliation,
PROPAGATION_DATA_INFO_TYPE, PROPAGATION_DATA_INFO_TYPE,
classID, classID,
className)); className));
@ -142,9 +143,9 @@ public class PropagationConstant {
public static Relation getRelation( public static Relation getRelation(
String source, String source,
String target, String target,
String rel_class, Relation.RELCLASS rel_class,
String rel_type, Relation.RELTYPE rel_type,
String subrel_type, Relation.SUBRELTYPE subrel_type,
String inference_provenance, String inference_provenance,
String inference_class_id, String inference_class_id,
String inference_class_name) { String inference_class_name) {

View File

@ -96,7 +96,7 @@ public class PrepareDatasourceCountryAssociation {
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class) Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class)
.filter( .filter(
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY)); (FilterFunction<Relation>) rel -> rel.getRelClass() == Relation.RELCLASS.isProvidedBy);
// filtering of the organization taking only the non deleted by inference and those with information about the // filtering of the organization taking only the non deleted by inference and those with information about the
// country // country

View File

@ -81,7 +81,7 @@ public class PrepareProjectResultsAssociation {
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ " AND lower(relClass) = '" + " AND lower(relClass) = '"
+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + Relation.RELCLASS.isProducedBy.toString().toLowerCase()
+ "'"; + "'";
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query); Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);

View File

@ -112,9 +112,9 @@ public class SparkResultToProjectThroughSemRelJob {
getRelation( getRelation(
projectId, projectId,
resId, resId,
ModelConstants.PRODUCES, Relation.RELCLASS.produces,
ModelConstants.RESULT_PROJECT, Relation.RELTYPE.resultProject,
ModelConstants.OUTCOME, Relation.SUBRELTYPE.outcome,
PROPAGATION_DATA_INFO_TYPE, PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME))); PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)));

View File

@ -78,13 +78,13 @@ public class PrepareResultCommunitySet {
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ " AND lower(relClass) = '" + " AND lower(relClass) = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + Relation.RELCLASS.hasAuthorInstitution.toString().toLowerCase()
+ "') result_organization " + "') result_organization "
+ "LEFT JOIN (SELECT source, collect_set(target) org_set " + "LEFT JOIN (SELECT source, collect_set(target) org_set "
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ " AND lower(relClass) = '" + " AND lower(relClass) = '"
+ ModelConstants.MERGES.toLowerCase() + Relation.RELCLASS.merges.toString().toLowerCase()
+ "' " + "' "
+ " GROUP BY source) organization_organization " + " GROUP BY source) organization_organization "
+ "ON result_organization.target = organization_organization.source "; + "ON result_organization.target = organization_organization.source ";

View File

@ -109,7 +109,7 @@ public class PrepareResultInstRepoAssociation {
+ "JOIN ( SELECT source, target " + "JOIN ( SELECT source, target "
+ "FROM relation " + "FROM relation "
+ "WHERE lower(relclass) = '" + "WHERE lower(relclass) = '"
+ ModelConstants.IS_PROVIDED_BY.toLowerCase() + Relation.RELCLASS.isProvidedBy.toString().toLowerCase()
+ "' " + "' "
+ "AND datainfo.deletedbyinference = false ) rel " + "AND datainfo.deletedbyinference = false ) rel "
+ "ON d.id = rel.source "; + "ON d.id = rel.source ";
@ -129,7 +129,7 @@ public class PrepareResultInstRepoAssociation {
+ "from relation " + "from relation "
+ "where datainfo.deletedbyinference = false " + "where datainfo.deletedbyinference = false "
+ "and lower(relClass) = '" + "and lower(relClass) = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + Relation.RELCLASS.hasAuthorInstitution.toString().toLowerCase()
+ "' " + "' "
+ "group by source"; + "group by source";

View File

@ -142,9 +142,9 @@ public class SparkResultToOrganizationFromIstRepoJob {
.add( .add(
getRelation( getRelation(
resultId, orgId, resultId, orgId,
ModelConstants.HAS_AUTHOR_INSTITUTION, Relation.RELCLASS.hasAuthorInstitution,
ModelConstants.RESULT_ORGANIZATION, Relation.RELTYPE.resultOrganization,
ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE, Relation.SUBRELTYPE.affiliation, PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)) PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME))

View File

@ -36,14 +36,14 @@ public class PrepareInfo implements Serializable {
private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet " private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet "
+ +
"FROM relation " + "FROM relation " +
"WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() + "WHERE lower(relclass) = '" + Relation.RELCLASS.IsParentOf.toString().toLowerCase() +
"' and datainfo.deletedbyinference = false " + "' and datainfo.deletedbyinference = false " +
"GROUP BY target"; "GROUP BY target";
// associates results with all the orgs they are affiliated to // associates results with all the orgs they are affiliated to
private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " + private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " +
"FROM relation " + "FROM relation " +
"WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() + "WHERE lower(relclass) = '" + Relation.RELCLASS.hasAuthorInstitution.toString().toLowerCase() +
"' and datainfo.deletedbyinference = false " + "' and datainfo.deletedbyinference = false " +
"GROUP BY source"; "GROUP BY source";
@ -115,7 +115,7 @@ public class PrepareInfo implements Serializable {
relation relation
.filter( .filter(
(FilterFunction<Relation>) r -> r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION)) (FilterFunction<Relation>) r -> r.getRelClass().equals(Relation.RELCLASS.hasAuthorInstitution))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
@ -124,14 +124,14 @@ public class PrepareInfo implements Serializable {
Dataset<String> children = spark Dataset<String> children = spark
.sql( .sql(
"Select distinct target as child from relation where " + "Select distinct target as child from relation where " +
"lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() + "lower(relclass)='" + Relation.RELCLASS.IsParentOf.toString().toLowerCase() +
"' and datainfo.deletedbyinference = false") "' and datainfo.deletedbyinference = false")
.as(Encoders.STRING()); .as(Encoders.STRING());
Dataset<String> parent = spark Dataset<String> parent = spark
.sql( .sql(
"Select distinct source as parent from relation " + "Select distinct source as parent from relation " +
"where lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() + "where lower(relclass)='" + Relation.RELCLASS.IsParentOf.toString().toLowerCase() +
"' and datainfo.deletedbyinference = false") "' and datainfo.deletedbyinference = false")
.as(Encoders.STRING()); .as(Encoders.STRING());

View File

@ -193,9 +193,9 @@ public class StepActions implements Serializable {
orgId -> getRelation( orgId -> getRelation(
v.getKey(), v.getKey(),
orgId, orgId,
ModelConstants.HAS_AUTHOR_INSTITUTION, Relation.RELCLASS.hasAuthorInstitution,
ModelConstants.RESULT_ORGANIZATION, Relation.RELTYPE.resultOrganization,
ModelConstants.AFFILIATION, Relation.SUBRELTYPE.affiliation,
PROPAGATION_DATA_INFO_TYPE, PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME)) PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME))

View File

@ -119,8 +119,8 @@ public class SparkJobTest {
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
Assertions.assertEquals(18, tmp.count()); Assertions.assertEquals(18, tmp.count());
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); tmp.foreach(r -> Assertions.assertEquals(Relation.SUBRELTYPE.affiliation, r.getSubRelType()));
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); tmp.foreach(r -> Assertions.assertEquals(Relation.RELTYPE.resultOrganization, r.getRelType()));
tmp tmp
.foreach( .foreach(
r -> Assertions r -> Assertions
@ -148,7 +148,7 @@ public class SparkJobTest {
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count()); Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
tmp tmp
.filter(r -> r.getSource().substring(0, 3).equals("50|")) .filter(r -> r.getSource().substring(0, 3).equals("50|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); .foreach(r -> Assertions.assertEquals(Relation.RELCLASS.hasAuthorInstitution, r.getRelClass()));
Assertions Assertions
.assertEquals( .assertEquals(
2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); 2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
@ -168,7 +168,7 @@ public class SparkJobTest {
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count()); Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
tmp tmp
.filter(r -> r.getSource().substring(0, 3).equals("20|")) .filter(r -> r.getSource().substring(0, 3).equals("20|"))
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass())); .foreach(r -> Assertions.assertEquals(Relation.RELCLASS.isAuthorInstitutionOf, r.getRelClass()));
Assertions Assertions
.assertEquals( .assertEquals(
1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count()); 1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());

View File

@ -117,17 +117,17 @@ public class StepActionsTest {
verificationDs verificationDs
.foreach( .foreach(
(ForeachFunction<Relation>) r -> Assertions (ForeachFunction<Relation>) r -> Assertions
.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass())); .assertEquals(Relation.RELCLASS.hasAuthorInstitution, r.getRelClass()));
verificationDs verificationDs
.foreach( .foreach(
(ForeachFunction<Relation>) r -> Assertions (ForeachFunction<Relation>) r -> Assertions
.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType())); .assertEquals(Relation.RELTYPE.resultOrganization, r.getRelType()));
verificationDs verificationDs
.foreach( .foreach(
(ForeachFunction<Relation>) r -> Assertions (ForeachFunction<Relation>) r -> Assertions
.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType())); .assertEquals(Relation.SUBRELTYPE.affiliation, r.getSubRelType()));
verificationDs verificationDs
.foreach( .foreach(

View File

@ -89,7 +89,7 @@ public class GetDatasourceFromCountry implements Serializable {
(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class), (MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
Encoders.bean(Relation.class)) Encoders.bean(Relation.class))
.filter( .filter(
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY)); (FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(Relation.RELCLASS.isProvidedBy));
organization organization
.joinWith(relation, organization.col("id").equalTo(relation.col("target"))) .joinWith(relation, organization.col("id").equalTo(relation.col("target")))

View File

@ -1,33 +1,26 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; import com.google.common.collect.Lists;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; import com.google.common.collect.Sets;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; import eu.dnetlib.dhp.schema.common.ModelConstants;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Entity; import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.UrlValidator; import org.apache.commons.validator.routines.UrlValidator;
import org.dom4j.*; import org.dom4j.*;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists; import java.util.*;
import com.google.common.collect.Sets; import java.util.stream.Collectors;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import eu.dnetlib.dhp.schema.common.ModelConstants; import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.createOpenaireId;
import eu.dnetlib.dhp.schema.oaf.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public abstract class AbstractMdRecordToOafMapper { public abstract class AbstractMdRecordToOafMapper {
@ -261,7 +254,7 @@ public abstract class AbstractMdRecordToOafMapper {
res res
.add( .add(
OafMapperUtils OafMapperUtils
.getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity, validationdDate)); .getRelation(projectId, docId, Relation.RELTYPE.resultProject, Relation.SUBRELTYPE.outcome, Relation.RELCLASS.produces, entity, validationdDate));
} }
} }
@ -276,12 +269,11 @@ public abstract class AbstractMdRecordToOafMapper {
Element element = (Element) o; Element element = (Element) o;
final String target = StringUtils.trim(element.getText()); final String target = StringUtils.trim(element.getText());
final String relType = element.attributeValue("relType"); final Relation.RELTYPE relType = Relation.RELTYPE.valueOf(element.attributeValue("relType"));
final String subRelType = element.attributeValue("subRelType"); final Relation.SUBRELTYPE subRelType = Relation.SUBRELTYPE.valueOf(element.attributeValue("subRelType"));
final String relClass = element.attributeValue("relClass"); final Relation.RELCLASS relClass = Relation.RELCLASS.lookUp(element.attributeValue("relClass"));
if (StringUtils.isNotBlank(target) && StringUtils.isNotBlank(relType) && StringUtils.isNotBlank(subRelType) if (StringUtils.isNotBlank(target)) {
&& StringUtils.isNotBlank(relClass)) {
final String validationdDate = ((Node) o).valueOf("@validationDate"); final String validationdDate = ((Node) o).valueOf("@validationDate");

View File

@ -90,6 +90,8 @@ object SparkResolveEntities {
case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) case EntityType.dataset => mapper.readValue(input, classOf[OafDataset])
case EntityType.software => mapper.readValue(input, classOf[Software]) case EntityType.software => mapper.readValue(input, classOf[Software])
case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct]) case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct])
case _ => throw new IllegalArgumentException(s"Unexpected entity type $entity")
} }
} }

View File

@ -104,24 +104,12 @@ object SparkConvertRDDtoDataset {
log.info("Converting Relation") log.info("Converting Relation")
val relClassFilter = List(
ModelConstants.MERGES,
ModelConstants.IS_MERGED_IN,
ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS,
ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS
)
val rddRelation = spark.sparkContext val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation") .textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation])) .map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
.filter(r => filterRelations(r)) .filter(r => filterRelations(r))
//filter OpenCitations relations
// .filter(r =>
// r.getDataInfo.getProvenanceaction != null &&
// !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid)
// )
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
} }
@ -133,12 +121,12 @@ object SparkConvertRDDtoDataset {
*/ */
val relClassFilter = List( val relClassFilter = List(
ModelConstants.MERGES, Relation.RELCLASS.merges,
ModelConstants.IS_MERGED_IN, Relation.RELCLASS.isMergedIn,
ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS, Relation.RELCLASS.HasAmongTopNSimilarDocuments,
ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS Relation.RELCLASS.IsAmongTopNSimilarDocuments
) )
if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) if (relClassFilter.contains(r.getRelClass))
false false
else { else {
if (r.getProvenance == null || r.getProvenance.isEmpty) if (r.getProvenance == null || r.getProvenance.isEmpty)

View File

@ -112,7 +112,6 @@ object SparkCreateInputGraph {
log.info(s"Extract ${clazz.getSimpleName}") log.info(s"Extract ${clazz.getSimpleName}")
oafDs oafDs
.filter(o => o.isInstanceOf[T]) .filter(o => o.isInstanceOf[T])
.map(p => p.asInstanceOf[T])
.write .write
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.save(targetPath) .save(targetPath)

View File

@ -51,7 +51,7 @@ object SparkCreateScholix {
val relationDS: Dataset[(String, Relation)] = spark.read val relationDS: Dataset[(String, Relation)] = spark.read
.load(relationPath) .load(relationPath)
.as[Relation] .as[Relation]
.filter(r => !r.getRelClass.toLowerCase.contains("merge")) .filter(r => !r.getRelClass.toString.toLowerCase.contains("merge"))
.map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder))
val summaryDS: Dataset[(String, ScholixSummary)] = spark.read val summaryDS: Dataset[(String, ScholixSummary)] = spark.read

View File

@ -1,17 +1,13 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup; import com.fasterxml.jackson.databind.ObjectMapper;
import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import static org.junit.jupiter.api.Assertions.*; import eu.dnetlib.dhp.schema.common.ModelConstants;
import static org.mockito.Mockito.lenient; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import java.io.IOException; import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import java.util.List; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
@ -21,14 +17,16 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames;
import eu.dnetlib.dhp.schema.oaf.*; import static org.junit.jupiter.api.Assertions.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import static org.mockito.Mockito.lenient;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
class MappersTest { class MappersTest {
@ -129,7 +127,7 @@ class MappersTest {
.stream() .stream()
.filter(o -> o instanceof Relation) .filter(o -> o instanceof Relation)
.map(o -> (Relation) o) .map(o -> (Relation) o)
.filter(r -> ModelConstants.RESULT_PROJECT.equals(r.getRelType())) .filter(r -> Relation.RELTYPE.resultProject.equals(r.getRelType()))
.collect(Collectors.toList()); .collect(Collectors.toList());
assertEquals(2, resultProject.size()); assertEquals(2, resultProject.size());
@ -152,7 +150,7 @@ class MappersTest {
.stream() .stream()
.filter(o -> o instanceof Relation) .filter(o -> o instanceof Relation)
.map(o -> (Relation) o) .map(o -> (Relation) o)
.filter(r -> ModelConstants.RESULT_ORGANIZATION.equals(r.getRelType())) .filter(r -> Relation.RELTYPE.resultOrganization.equals(r.getRelType()))
.collect(Collectors.toList()); .collect(Collectors.toList());
assertEquals(2, affiliation.size()); assertEquals(2, affiliation.size());
@ -297,17 +295,17 @@ class MappersTest {
assertEquals(d.getId(), r1.getSource()); assertEquals(d.getId(), r1.getSource());
assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget()); assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget());
assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType()); assertEquals(Relation.RELTYPE.resultProject, r1.getRelType());
assertEquals(ModelConstants.OUTCOME, r1.getSubRelType()); assertEquals(Relation.SUBRELTYPE.outcome, r1.getSubRelType());
assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass()); assertEquals(Relation.RELCLASS.isProducedBy, r1.getRelClass());
assertTrue(r1.getValidated()); assertTrue(r1.getValidated());
assertEquals("2020-01-01", r1.getValidationDate()); assertEquals("2020-01-01", r1.getValidationDate());
assertEquals(d.getId(), r2.getTarget()); assertEquals(d.getId(), r2.getTarget());
assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource()); assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource());
assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType()); assertEquals(Relation.RELTYPE.resultProject, r2.getRelType());
assertEquals(ModelConstants.OUTCOME, r2.getSubRelType()); assertEquals(Relation.SUBRELTYPE.outcome, r2.getSubRelType());
assertEquals(ModelConstants.PRODUCES, r2.getRelClass()); assertEquals(Relation.RELCLASS.produces, r2.getRelClass());
assertTrue(r2.getValidated()); assertTrue(r2.getValidated());
assertEquals("2020-01-01", r2.getValidationDate()); assertEquals("2020-01-01", r2.getValidationDate());
@ -597,15 +595,15 @@ class MappersTest {
assertEquals(s.getId(), r1.getSource()); assertEquals(s.getId(), r1.getSource());
assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r1.getTarget()); assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r1.getTarget());
assertEquals(ModelConstants.RESULT_RESULT, r1.getRelType()); assertEquals(Relation.RELTYPE.resultResult, r1.getRelType());
assertEquals(ModelConstants.RELATIONSHIP, r1.getSubRelType()); assertEquals(Relation.SUBRELTYPE.relationship, r1.getSubRelType());
assertEquals(ModelConstants.IS_REFERENCED_BY, r1.getRelClass()); assertEquals(Relation.RELCLASS.IsReferencedBy, r1.getRelClass());
assertEquals(s.getId(), r2.getTarget()); assertEquals(s.getId(), r2.getTarget());
assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r2.getSource()); assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r2.getSource());
assertEquals(ModelConstants.RESULT_RESULT, r2.getRelType()); assertEquals(Relation.RELTYPE.resultResult, r2.getRelType());
assertEquals(ModelConstants.RELATIONSHIP, r2.getSubRelType()); assertEquals(Relation.SUBRELTYPE.relationship, r2.getSubRelType());
assertEquals(ModelConstants.REFERENCES, r2.getRelClass()); assertEquals(Relation.RELCLASS.References, r2.getRelClass());
} }

View File

@ -246,10 +246,10 @@ class MigrateDbEntitiesApplicationTest {
assertEquals(ModelConstants.DATASOURCE_ORGANIZATION, r1.getRelType()); assertEquals(ModelConstants.DATASOURCE_ORGANIZATION, r1.getRelType());
assertEquals(ModelConstants.DATASOURCE_ORGANIZATION, r2.getRelType()); assertEquals(ModelConstants.DATASOURCE_ORGANIZATION, r2.getRelType());
assertEquals(ModelConstants.PROVISION, r1.getSubRelType()); assertEquals(Relation.SUBRELTYPE.provision, r1.getSubRelType());
assertEquals(ModelConstants.PROVISION, r2.getSubRelType()); assertEquals(Relation.SUBRELTYPE.provision, r2.getSubRelType());
assertEquals(ModelConstants.IS_PROVIDED_BY, r1.getRelClass()); assertEquals(Relation.RELCLASS.isProvidedBy, r1.getRelClass());
assertEquals(ModelConstants.PROVIDES, r2.getRelClass()); assertEquals(ModelConstants.PROVIDES, r2.getRelClass());
} }
@ -272,7 +272,7 @@ class MigrateDbEntitiesApplicationTest {
assertValidId(rel.getProvenance().get(0).getCollectedfrom().getKey()); assertValidId(rel.getProvenance().get(0).getCollectedfrom().getKey());
assertEquals(ModelConstants.PROJECT_ORGANIZATION, rel.getRelType()); assertEquals(ModelConstants.PROJECT_ORGANIZATION, rel.getRelType());
assertEquals(ModelConstants.PARTICIPATION, rel.getSubRelType()); assertEquals(Relation.SUBRELTYPE.participation, rel.getSubRelType());
assertEquals(ModelConstants.IS_PARTICIPANT, rel.getRelClass()); assertEquals(ModelConstants.IS_PARTICIPANT, rel.getRelClass());
assertNotNull(rel.getProperties()); assertNotNull(rel.getProperties());

View File

@ -3,11 +3,9 @@ package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.HashSet; import java.util.*;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -80,10 +78,15 @@ public class PrepareRelationsJob {
.orElse(DEFAULT_NUM_PARTITIONS); .orElse(DEFAULT_NUM_PARTITIONS);
log.info("relPartitions: {}", relPartitions); log.info("relPartitions: {}", relPartitions);
Set<String> relationFilter = Optional Set<Relation.RELCLASS> relationFilter = Optional
.ofNullable(parser.get("relationFilter")) .ofNullable(parser.get("relationFilter"))
.map(String::toLowerCase) .map(String::toLowerCase)
.map(s -> Sets.newHashSet(Splitter.on(",").split(s))) .map(s -> Sets.newHashSet(
StreamSupport.stream(
Splitter.on(",").split(s).spliterator(), false)
.map(Relation.RELCLASS::valueOf)
.collect(Collectors.toList())
) )
.orElse(new HashSet<>()); .orElse(new HashSet<>());
log.info("relationFilter: {}", relationFilter); log.info("relationFilter: {}", relationFilter);
@ -128,11 +131,11 @@ public class PrepareRelationsJob {
* @param relPartitions number of partitions for the output RDD * @param relPartitions number of partitions for the output RDD
*/ */
private static void prepareRelationsRDD(SparkSession spark, String inputRelationsPath, String outputPath, private static void prepareRelationsRDD(SparkSession spark, String inputRelationsPath, String outputPath,
Set<String> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { Set<Relation.RELCLASS> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath) JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
.filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved"))) .filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved")))
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); .filter(rel -> !relationFilter.contains(rel.getRelClass()));
JavaRDD<Relation> pruned = pruneRels( JavaRDD<Relation> pruned = pruneRels(
pruneRels( pruneRels(

View File

@ -13,21 +13,23 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
public class RelationComparator implements Comparator<Relation> { public class RelationComparator implements Comparator<Relation> {
private static final Map<String, Integer> weights = Maps.newHashMap(); private static final Map<Relation.SUBRELTYPE, Integer> weights = Maps.newHashMap();
static { static {
weights.put(ModelConstants.OUTCOME, 0); weights.put(Relation.SUBRELTYPE.outcome, 0);
weights.put(ModelConstants.SUPPLEMENT, 1); weights.put(Relation.SUBRELTYPE.supplement, 1);
weights.put(ModelConstants.REVIEW, 2); weights.put(Relation.SUBRELTYPE.review, 2);
weights.put(ModelConstants.CITATION, 3); weights.put(Relation.SUBRELTYPE.citation, 3);
weights.put(ModelConstants.AFFILIATION, 4); weights.put(Relation.SUBRELTYPE.affiliation, 4);
weights.put(ModelConstants.RELATIONSHIP, 5); //TODO CLAUDIO PLEASE CHECK IF the SUBSTITUTION OF publicationDataset WITH RELATIONSHIPS IS OK
weights.put(ModelConstants.PUBLICATION_DATASET, 6); // weights.put(Relation.SUBRELTYPE.relationship, 5);
weights.put(ModelConstants.SIMILARITY, 7); weights.put(Relation.SUBRELTYPE.relationship, 6);
weights.put(Relation.SUBRELTYPE.similarity, 7);
weights.put(ModelConstants.PROVISION, 8); weights.put(Relation.SUBRELTYPE.provision, 8);
weights.put(ModelConstants.PARTICIPATION, 9); weights.put(Relation.SUBRELTYPE.participation, 9);
weights.put(ModelConstants.DEDUP, 10); weights.put(Relation.SUBRELTYPE.dedup, 10);
} }
private Integer getWeight(Relation o) { private Integer getWeight(Relation o) {

View File

@ -14,23 +14,24 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
public class SortableRelation extends Relation implements Comparable<SortableRelation>, Serializable { public class SortableRelation extends Relation implements Comparable<SortableRelation>, Serializable {
private static final Map<String, Integer> weights = Maps.newHashMap(); private static final Map<Relation.SUBRELTYPE, Integer> weights = Maps.newHashMap();
//TODO version and part missing why publication is there?
static { static {
weights.put(ModelConstants.OUTCOME, 0); weights.put(Relation.SUBRELTYPE.outcome, 0);
weights.put(ModelConstants.SUPPLEMENT, 1); weights.put(Relation.SUBRELTYPE.supplement, 1);
weights.put(ModelConstants.REVIEW, 2); weights.put(Relation.SUBRELTYPE.review, 2);
weights.put(ModelConstants.CITATION, 3); weights.put(Relation.SUBRELTYPE.citation, 3);
weights.put(ModelConstants.AFFILIATION, 4); weights.put(Relation.SUBRELTYPE.affiliation, 4);
weights.put(ModelConstants.RELATIONSHIP, 5); weights.put(Relation.SUBRELTYPE.relationship, 5);
weights.put(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, 6); //weights.put(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, 6);
weights.put(ModelConstants.SIMILARITY, 7); weights.put(Relation.SUBRELTYPE.similarity, 7);
weights.put(ModelConstants.PROVISION, 8); weights.put(Relation.SUBRELTYPE.provision, 8);
weights.put(ModelConstants.PARTICIPATION, 9); weights.put(Relation.SUBRELTYPE.participation, 9);
weights.put(ModelConstants.DEDUP, 10); weights.put(Relation.SUBRELTYPE.dedup, 10);
} }
private static final long serialVersionUID = 34753984579L; private static final long serialVersionUID = 34753984579L;
private String groupingKey; private String groupingKey;

View File

@ -17,19 +17,19 @@ public class SortableRelationKey implements Comparable<SortableRelationKey>, Ser
private static final Map<String, Integer> weights = Maps.newHashMap(); private static final Map<String, Integer> weights = Maps.newHashMap();
static { static {
weights.put(ModelConstants.PARTICIPATION, 0); weights.put(Relation.SUBRELTYPE.participation, 0);
weights.put(ModelConstants.OUTCOME, 1); weights.put(Relation.SUBRELTYPE.outcome, 1);
weights.put(ModelConstants.AFFILIATION, 2); weights.put(Relation.SUBRELTYPE.affiliation, 2);
weights.put(ModelConstants.DEDUP, 3); weights.put(Relation.SUBRELTYPE.dedup, 3);
weights.put(ModelConstants.PUBLICATION_DATASET, 4); weights.put(ModelConstants.PUBLICATION_DATASET, 4);
weights.put(ModelConstants.SUPPLEMENT, 5); weights.put(Relation.SUBRELTYPE.supplement, 5);
weights.put(ModelConstants.REVIEW, 6); weights.put(Relation.SUBRELTYPE.review, 6);
weights.put(ModelConstants.RELATIONSHIP, 7); weights.put(Relation.SUBRELTYPE.relationship, 7);
weights.put(ModelConstants.PART, 8); weights.put(ModelConstants.PART, 8);
weights.put(ModelConstants.PROVISION, 9); weights.put(Relation.SUBRELTYPE.provision, 9);
weights.put(ModelConstants.VERSION, 10); weights.put(ModelConstants.VERSION, 10);
weights.put(ModelConstants.SIMILARITY, 11); weights.put(Relation.SUBRELTYPE.similarity, 11);
weights.put(ModelConstants.CITATION, 12); weights.put(Relation.SUBRELTYPE.citation, 12);
} }
private static final long serialVersionUID = 3232323; private static final long serialVersionUID = 3232323;

View File

@ -1459,7 +1459,7 @@ public class XmlRecordFactory implements Serializable {
} }
private boolean isDuplicate(final RelatedEntityWrapper link) { private boolean isDuplicate(final RelatedEntityWrapper link) {
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); return Relation.SUBRELTYPE.dedup.equalsIgnoreCase(link.getRelation().getSubRelType());
} }
private List<String> listExtraInfo(final Entity entity) { private List<String> listExtraInfo(final Entity entity) {