- Removed ModelConstants and update Relation enum
This commit is contained in:
parent
f8f4b9a018
commit
88fffa6dbd
|
@ -161,9 +161,9 @@ public class CreateActionSetSparkJob implements Serializable {
|
|||
r.setProvenance(PROVENANCE);
|
||||
r.setSource(source);
|
||||
r.setTarget(target);
|
||||
r.setRelType(ModelConstants.RESULT_RESULT);
|
||||
r.setSubRelType(ModelConstants.CITATION);
|
||||
r.setRelClass(ModelConstants.CITES);
|
||||
r.setRelType(Relation.RELTYPE.resultResult);
|
||||
r.setSubRelType(Relation.SUBRELTYPE.citation);
|
||||
r.setRelClass(Relation.RELCLASS.Cites);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
@ -648,11 +648,11 @@ object DataciteToOAFTransformation {
|
|||
(r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference"))
|
||||
)
|
||||
.map(r => {
|
||||
r.relationType match {
|
||||
case Relation.RELCLASS.Cites.toString | Relation.RELCLASS.References.toString =>
|
||||
Relation.RELCLASS.valueOf(r.relationType) match {
|
||||
case Relation.RELCLASS.Cites | Relation.RELCLASS.References =>
|
||||
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
relation(id, target, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date)
|
||||
case Relation.RELCLASS.IsCitedBy.toString | Relation.RELCLASS.IsReferencedBy.toString =>
|
||||
case Relation.RELCLASS.IsCitedBy | Relation.RELCLASS.IsReferencedBy =>
|
||||
val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
relation(source, id, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date)
|
||||
}
|
||||
|
|
|
@ -2,13 +2,13 @@ package eu.dnetlib.dhp.sx.bio
|
|||
|
||||
import com.google.common.collect.Lists
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.{compact, parse, render}
|
||||
|
||||
import collection.JavaConverters._
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object BioDBToOAF {
|
||||
|
||||
|
@ -127,8 +127,8 @@ object BioDBToOAF {
|
|||
target_pid_type,
|
||||
generate_unresolved_id(source_pid, source_pid_type),
|
||||
collectedFromMap("elsevier"),
|
||||
"relationship",
|
||||
relation_semantic,
|
||||
Relation.SUBRELTYPE.relationship,
|
||||
Relation.RELCLASS.lookUp(relation_semantic),
|
||||
date
|
||||
)
|
||||
|
||||
|
@ -323,8 +323,8 @@ object BioDBToOAF {
|
|||
"pmid",
|
||||
d.getId,
|
||||
collectedFromMap("uniprot"),
|
||||
ModelConstants.RELATIONSHIP,
|
||||
ModelConstants.IS_RELATED_TO,
|
||||
Relation.SUBRELTYPE.relationship,
|
||||
Relation.RELCLASS.IsRelatedTo,
|
||||
if (i_date.isDefined) i_date.get.date else null
|
||||
)
|
||||
rel.getProvenance.asScala.map(p => p.getCollectedfrom)
|
||||
|
@ -335,8 +335,8 @@ object BioDBToOAF {
|
|||
"doi",
|
||||
d.getId,
|
||||
collectedFromMap("uniprot"),
|
||||
ModelConstants.RELATIONSHIP,
|
||||
ModelConstants.IS_RELATED_TO,
|
||||
Relation.SUBRELTYPE.relationship,
|
||||
Relation.RELCLASS.IsRelatedTo,
|
||||
if (i_date.isDefined) i_date.get.date else null
|
||||
)
|
||||
List(d, rel)
|
||||
|
@ -353,8 +353,8 @@ object BioDBToOAF {
|
|||
pidType: String,
|
||||
sourceId: String,
|
||||
collectedFrom: KeyValue,
|
||||
subRelType: String,
|
||||
relClass: String,
|
||||
subRelType: Relation.SUBRELTYPE,
|
||||
relClass: Relation.RELCLASS,
|
||||
date: String
|
||||
): Relation = {
|
||||
|
||||
|
@ -370,7 +370,7 @@ object BioDBToOAF {
|
|||
|
||||
rel.setProvenance(provenance)
|
||||
|
||||
rel.setRelType(ModelConstants.RESULT_RESULT)
|
||||
rel.setRelType(Relation.RELTYPE.resultResult)
|
||||
rel.setSubRelType(subRelType)
|
||||
rel.setRelClass(relClass)
|
||||
|
||||
|
@ -398,10 +398,11 @@ object BioDBToOAF {
|
|||
pidType,
|
||||
sourceId,
|
||||
collectedFrom,
|
||||
ModelConstants.SUPPLEMENT,
|
||||
ModelConstants.IS_SUPPLEMENT_TO,
|
||||
Relation.SUBRELTYPE.supplement,
|
||||
Relation.RELCLASS.IsSupplementTo,
|
||||
date
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
def pdbTOOaf(input: String): List[Oaf] = {
|
||||
|
@ -573,8 +574,8 @@ object BioDBToOAF {
|
|||
"pmid",
|
||||
d.getId,
|
||||
collectedFromMap("ebi"),
|
||||
ModelConstants.RELATIONSHIP,
|
||||
ModelConstants.IS_RELATED_TO,
|
||||
Relation.SUBRELTYPE.relationship,
|
||||
Relation.RELCLASS.IsRelatedTo,
|
||||
GraphCleaningFunctions.cleanDate(input.date)
|
||||
)
|
||||
)
|
||||
|
|
|
@ -326,7 +326,7 @@ public class CreateOpenCitationsASTest {
|
|||
});
|
||||
|
||||
assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count());
|
||||
check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass()));
|
||||
check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(Relation.RELCLASS.Cites, r.getRelClass()));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ public class PrepareGroupsJob {
|
|||
|
||||
final Dataset<Relation> mergedRels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelClass().equals(Relation.RELCLASS.isMergedIn));
|
||||
|
||||
final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator()
|
||||
.toColumn();
|
||||
|
|
|
@ -68,7 +68,7 @@ public class PrepareRelatedDatasetsJob {
|
|||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultResult))
|
||||
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
@ -78,7 +78,7 @@ public class PrepareRelatedDatasetsJob {
|
|||
.map((MapFunction<Tuple2<Relation, OaBrokerRelatedDataset>, RelatedDataset>) t -> {
|
||||
final RelatedDataset rel = new RelatedDataset(t._1.getSource(),
|
||||
t._2);
|
||||
rel.getRelDataset().setRelType(t._1.getRelClass());
|
||||
rel.getRelDataset().setRelType(t._1.getRelClass().toString());
|
||||
return rel;
|
||||
}, Encoders.bean(RelatedDataset.class));
|
||||
|
||||
|
|
|
@ -68,10 +68,11 @@ public class PrepareRelatedProjectsJob {
|
|||
(MapFunction<Project, OaBrokerProject>) ConversionUtils::oafProjectToBrokerProject,
|
||||
Encoders.bean(OaBrokerProject.class));
|
||||
|
||||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
|
||||
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultProject))
|
||||
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(Relation.RELCLASS.isMergedIn))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ public class PrepareRelatedPublicationsJob {
|
|||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultResult))
|
||||
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
@ -79,7 +79,7 @@ public class PrepareRelatedPublicationsJob {
|
|||
.map((MapFunction<Tuple2<Relation, OaBrokerRelatedPublication>, RelatedPublication>) t -> {
|
||||
final RelatedPublication rel = new RelatedPublication(
|
||||
t._1.getSource(), t._2);
|
||||
rel.getRelPublication().setRelType(t._1.getRelClass());
|
||||
rel.getRelPublication().setRelType(t._1.getRelClass().toString());
|
||||
return rel;
|
||||
}, Encoders.bean(RelatedPublication.class));
|
||||
|
||||
|
|
|
@ -73,8 +73,8 @@ public class PrepareRelatedSoftwaresJob {
|
|||
final Dataset<Relation> rels;
|
||||
rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(Relation.RELTYPE.resultResult))
|
||||
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(Relation.RELCLASS.isMergedIn))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset {
|
||||
|
||||
|
@ -12,7 +12,8 @@ public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDat
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_REFERENCED_BY);
|
||||
|
||||
return relType.equals(Relation.RELCLASS.IsReferencedBy);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset {
|
||||
|
||||
|
@ -12,7 +12,8 @@ public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDatase
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_RELATED_TO);
|
||||
|
||||
return relType.equals(Relation.RELCLASS.IsRelatedTo);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset {
|
||||
|
||||
|
@ -12,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingD
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY);
|
||||
return relType.equals(Relation.RELCLASS.IsSupplementedBy);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset {
|
||||
|
||||
|
@ -12,7 +12,8 @@ public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingD
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_SUPPLEMENT_TO);
|
||||
|
||||
return relType.equals(Relation.RELCLASS.IsSupplementTo);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset {
|
||||
|
||||
|
@ -12,7 +12,9 @@ public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.REFERENCES);
|
||||
|
||||
|
||||
return relType.equals(Relation.RELCLASS.References);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication {
|
||||
|
||||
|
@ -12,6 +12,6 @@ public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissin
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_REFERENCED_BY);
|
||||
return relType.equals(Relation.RELCLASS.IsReferencedBy);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication {
|
||||
|
||||
|
@ -12,7 +12,7 @@ public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPu
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_RELATED_TO);
|
||||
return relType.equals(Relation.RELCLASS.IsRelatedTo);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication {
|
||||
|
||||
|
@ -12,6 +12,6 @@ public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMiss
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY);
|
||||
return relType.equals(Relation.RELCLASS.IsSupplementedBy);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication {
|
||||
|
||||
|
@ -12,7 +12,7 @@ public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMiss
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.IS_SUPPLEMENT_TO);
|
||||
return relType.equals(Relation.RELCLASS.IsSupplementTo);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication {
|
||||
|
||||
|
@ -12,7 +12,7 @@ public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPub
|
|||
|
||||
@Override
|
||||
protected boolean filterByType(final String relType) {
|
||||
return relType.equals(ModelConstants.REFERENCES);
|
||||
return relType.equals(Relation.RELCLASS.References);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@ public class BrokerConstants {
|
|||
}
|
||||
|
||||
public static final String OPEN_ACCESS = "OPEN";
|
||||
public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN;
|
||||
|
||||
public static final String COLLECTED_FROM_REL = "collectedFrom";
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
|
@ -13,12 +13,9 @@ import org.apache.spark.sql.SaveMode;
|
|||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class ClusterUtils {
|
||||
|
||||
|
@ -59,12 +56,12 @@ public class ClusterUtils {
|
|||
return id.contains("dedup");
|
||||
}
|
||||
|
||||
public static final boolean isValidResultResultClass(final String s) {
|
||||
return s.equals(ModelConstants.IS_REFERENCED_BY)
|
||||
|| s.equals(ModelConstants.IS_RELATED_TO)
|
||||
|| s.equals(ModelConstants.REFERENCES)
|
||||
|| s.equals(ModelConstants.IS_SUPPLEMENTED_BY)
|
||||
|| s.equals(ModelConstants.IS_SUPPLEMENT_TO);
|
||||
public static final boolean isValidResultResultClass(final Relation.RELCLASS r) {
|
||||
return r.equals(Relation.RELCLASS.IsReferencedBy)
|
||||
|| r.equals(Relation.RELCLASS.References)
|
||||
|| r.equals(Relation.RELCLASS.IsRelatedTo)
|
||||
|| r.equals(Relation.RELCLASS.IsSupplementTo)
|
||||
|| r.equals(Relation.RELCLASS.IsSupplementedBy);
|
||||
}
|
||||
|
||||
public static <T> T incrementAccumulator(final T o, final LongAccumulator acc) {
|
||||
|
|
|
@ -100,7 +100,7 @@ public class ConversionUtils {
|
|||
|
||||
res.setOpenaireId(cleanOpenaireId(result.getId()));
|
||||
res.setOriginalId(first(result.getOriginalId()));
|
||||
res.setTypology(result.getResulttype());
|
||||
res.setTypology(result.getResulttype().toString());
|
||||
res.setTitles(structPropList(result.getTitle()));
|
||||
res.setAbstracts(result.getDescription());
|
||||
res.setLanguage(classId(result.getLanguage()));
|
||||
|
@ -112,7 +112,7 @@ public class ConversionUtils {
|
|||
res.setContributor(result.getContributor());
|
||||
res
|
||||
.setJournal(
|
||||
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null);
|
||||
result instanceof Publication ? oafJournalToBrokerJournal(result.getJournal()) : null);
|
||||
res.setPids(allResultPids(result));
|
||||
res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances));
|
||||
res
|
||||
|
|
|
@ -153,10 +153,12 @@ abstract class AbstractSparkAction implements Serializable {
|
|||
}
|
||||
|
||||
private boolean isOpenOrgsDedupMergeRelation(Relation rel) {
|
||||
return ModelConstants.ORG_ORG_RELTYPE.equals(rel.getRelType()) &&
|
||||
ModelConstants.DEDUP.equals(rel.getSubRelType())
|
||||
&& (ModelConstants.IS_MERGED_IN.equals(rel.getRelClass()) ||
|
||||
ModelConstants.MERGES.equals(rel.getRelClass()));
|
||||
|
||||
|
||||
return Relation.RELTYPE.organizationOrganization.equals(rel.getRelType()) &&
|
||||
Relation.SUBRELTYPE.dedup.equals(rel.getSubRelType())
|
||||
&& (Relation.RELCLASS.isMergedIn.equals(rel.getRelClass()) ||
|
||||
Relation.RELCLASS.merges.equals(rel.getRelClass()));
|
||||
}
|
||||
|
||||
protected static Boolean parseECField(String field) {
|
||||
|
|
|
@ -157,17 +157,18 @@ public class DedupUtility {
|
|||
|
||||
public static Relation createSimRel(String source, String target, String entity) {
|
||||
final Relation r = new Relation();
|
||||
|
||||
r.setSource(source);
|
||||
r.setTarget(target);
|
||||
r.setSubRelType("dedupSimilarity");
|
||||
r.setRelClass(ModelConstants.IS_SIMILAR_TO);
|
||||
r.setSubRelType(Relation.SUBRELTYPE.dedup);
|
||||
r.setRelClass(Relation.RELCLASS.isSimilarTo);
|
||||
|
||||
switch (entity) {
|
||||
case "result":
|
||||
r.setRelType(ModelConstants.RESULT_RESULT);
|
||||
r.setRelType(Relation.RELTYPE.resultResult);
|
||||
break;
|
||||
case "organization":
|
||||
r.setRelType(ModelConstants.ORG_ORG_RELTYPE);
|
||||
r.setRelType(Relation.RELTYPE.organizationOrganization);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unmanaged entity type: " + entity);
|
||||
|
|
|
@ -90,9 +90,9 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction {
|
|||
}
|
||||
|
||||
private boolean isMergeRel(Relation rel) {
|
||||
return (rel.getRelClass().equals(ModelConstants.MERGES)
|
||||
|| rel.getRelClass().equals(ModelConstants.IS_MERGED_IN))
|
||||
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
|
||||
&& rel.getSubRelType().equals(ModelConstants.DEDUP);
|
||||
return (rel.getRelClass().equals(Relation.RELCLASS.merges)
|
||||
|| rel.getRelClass().equals(Relation.RELCLASS.isMergedIn))
|
||||
&& rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
|
||||
&& rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,9 +81,9 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction {
|
|||
}
|
||||
|
||||
private boolean filterOpenorgsRels(Relation rel) {
|
||||
return rel.getRelClass().equals(ModelConstants.IS_SIMILAR_TO)
|
||||
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
|
||||
&& rel.getSubRelType().equals(ModelConstants.DEDUP) && isOpenorgs(rel);
|
||||
return rel.getRelClass().equals(Relation.RELCLASS.isSimilarTo)
|
||||
&& rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
|
||||
&& rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup) && isOpenorgs(rel);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
|
|||
.load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity))
|
||||
.as(Encoders.bean(Relation.class))
|
||||
.javaRDD()
|
||||
.map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass()))
|
||||
.map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass().toString()))
|
||||
.rdd();
|
||||
|
||||
Dataset<Tuple2<String, String>> rawMergeRels = spark
|
||||
|
@ -199,14 +199,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
|
|||
id -> {
|
||||
List<Relation> rels = new ArrayList<>();
|
||||
|
||||
rels.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf));
|
||||
rels.add(rel(cc.getCcId(), id, Relation.RELCLASS.merges, dedupConf));
|
||||
|
||||
return rels.stream();
|
||||
})
|
||||
.iterator();
|
||||
}
|
||||
|
||||
private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) {
|
||||
// TODO NEED to REVIEW THIS FUNCTION, THE UTILITY FUNCTION SHOULD BE MOVED ON SOME SUPPORT CLASS OR REUSE OTHER FUNCTION
|
||||
private Relation rel(String source, String target, Relation.RELCLASS relClass, DedupConfig dedupConf) {
|
||||
|
||||
String entityType = dedupConf.getWf().getEntityType();
|
||||
|
||||
|
@ -214,8 +215,8 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
|
|||
r.setSource(source);
|
||||
r.setTarget(target);
|
||||
r.setRelClass(relClass);
|
||||
r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1));
|
||||
r.setSubRelType(ModelConstants.DEDUP);
|
||||
r.setRelType(Relation.RELTYPE.valueOf(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1)));
|
||||
r.setSubRelType(Relation.SUBRELTYPE.dedup);
|
||||
|
||||
DataInfo info = new DataInfo();
|
||||
|
||||
|
|
|
@ -191,15 +191,15 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
|
|||
|
||||
switch (entityType) {
|
||||
case "result":
|
||||
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
|
||||
&& rel.getRelType().equals(ModelConstants.RESULT_RESULT)
|
||||
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
|
||||
if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
|
||||
&& rel.getRelType().equals(Relation.RELTYPE.resultResult)
|
||||
&& rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
|
||||
return true;
|
||||
break;
|
||||
case "organization":
|
||||
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
|
||||
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
|
||||
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
|
||||
if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
|
||||
&& rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
|
||||
&& rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -106,15 +106,15 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
|
|||
|
||||
switch (entityType) {
|
||||
case "result":
|
||||
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
|
||||
&& rel.getRelType().equals(ModelConstants.RESULT_RESULT)
|
||||
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
|
||||
if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
|
||||
&& rel.getRelType().equals(Relation.RELTYPE.resultResult)
|
||||
&& rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
|
||||
return true;
|
||||
break;
|
||||
case "organization":
|
||||
if (rel.getRelClass().equals(ModelConstants.IS_DIFFERENT_FROM)
|
||||
&& rel.getRelType().equals(ModelConstants.ORG_ORG_RELTYPE)
|
||||
&& rel.getSubRelType().equals(ModelConstants.DEDUP))
|
||||
if (rel.getRelClass().equals(Relation.RELCLASS.isDifferentFrom)
|
||||
&& rel.getRelType().equals(Relation.RELTYPE.organizationOrganization)
|
||||
&& rel.getSubRelType().equals(Relation.SUBRELTYPE.dedup))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -74,7 +74,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
|||
|
||||
// <mergedObjectID, dedupID>
|
||||
Dataset<Tuple2<String, String>> mergedIds = mergeRels
|
||||
.where(col("relClass").equalTo(ModelConstants.MERGES))
|
||||
.where(col("relClass").equalTo(Relation.RELCLASS.merges))
|
||||
.select(col("source"), col("target"))
|
||||
.distinct()
|
||||
.map(
|
||||
|
@ -111,7 +111,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
|||
.filter(getRelationFilterFunction())
|
||||
.groupByKey(
|
||||
(MapFunction<Relation, String>) r -> String
|
||||
.join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()),
|
||||
.join(r.getSource(), r.getTarget(), r.getRelType().toString(), r.getSubRelType().toString(), r.getRelClass().toString()),
|
||||
Encoders.STRING())
|
||||
.agg(new RelationAggregator().toColumn())
|
||||
.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, Encoders.bean(Relation.class));
|
||||
|
@ -150,9 +150,9 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
|||
private FilterFunction<Relation> getRelationFilterFunction() {
|
||||
return r -> StringUtils.isNotBlank(r.getSource()) ||
|
||||
StringUtils.isNotBlank(r.getTarget()) ||
|
||||
StringUtils.isNotBlank(r.getRelType()) ||
|
||||
StringUtils.isNotBlank(r.getSubRelType()) ||
|
||||
StringUtils.isNotBlank(r.getRelClass());
|
||||
r.getRelType() != null ||
|
||||
r.getSubRelType()!=null ||
|
||||
r.getRelClass()!=null;
|
||||
}
|
||||
|
||||
private static String getId(Relation r, FieldType type) {
|
||||
|
|
|
@ -415,9 +415,9 @@ public class SparkDedupTest implements Serializable {
|
|||
"50|doi_________::d5021b53204e4fdeab6ff5d5bc468032",
|
||||
"50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c");
|
||||
merges.forEach(r -> {
|
||||
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
|
||||
assertEquals(ModelConstants.DEDUP, r.getSubRelType());
|
||||
assertEquals(ModelConstants.MERGES, r.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.merges, r.getRelClass());
|
||||
assertTrue(dups.contains(r.getTarget()));
|
||||
});
|
||||
|
||||
|
@ -426,9 +426,9 @@ public class SparkDedupTest implements Serializable {
|
|||
.collectAsList();
|
||||
assertEquals(3, mergedIn.size());
|
||||
mergedIn.forEach(r -> {
|
||||
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
|
||||
assertEquals(ModelConstants.DEDUP, r.getSubRelType());
|
||||
assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.isMergedIn, r.getRelClass());
|
||||
assertTrue(dups.contains(r.getSource()));
|
||||
});
|
||||
|
||||
|
|
|
@ -190,9 +190,9 @@ public class SparkPublicationRootsTest implements Serializable {
|
|||
"50|doi_________::d5021b53204e4fdeab6ff5d5bc468032",
|
||||
"50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c");
|
||||
mergeList.forEach(r -> {
|
||||
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
|
||||
assertEquals(ModelConstants.DEDUP, r.getSubRelType());
|
||||
assertEquals(ModelConstants.MERGES, r.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.merges, r.getRelClass());
|
||||
assertTrue(dups.contains(r.getTarget()));
|
||||
});
|
||||
|
||||
|
@ -201,9 +201,9 @@ public class SparkPublicationRootsTest implements Serializable {
|
|||
.collectAsList();
|
||||
assertEquals(3, mergedIn.size());
|
||||
mergedIn.forEach(r -> {
|
||||
assertEquals(ModelConstants.RESULT_RESULT, r.getRelType());
|
||||
assertEquals(ModelConstants.DEDUP, r.getSubRelType());
|
||||
assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultResult, r.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.dedup, r.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.isMergedIn, r.getRelClass());
|
||||
assertTrue(dups.contains(r.getSource()));
|
||||
});
|
||||
|
||||
|
|
|
@ -384,9 +384,9 @@ case object Crossref2Oaf {
|
|||
val rel = new Relation
|
||||
rel.setSource(sourceId)
|
||||
rel.setTarget(targetId)
|
||||
rel.setRelType(ModelConstants.RESULT_RESULT)
|
||||
rel.setRelClass(ModelConstants.CITES)
|
||||
rel.setSubRelType(ModelConstants.CITATION)
|
||||
rel.setRelType(Relation.RELTYPE.resultResult)
|
||||
rel.setRelClass(Relation.RELCLASS.Cites)
|
||||
rel.setSubRelType(Relation.SUBRELTYPE.citation)
|
||||
rel.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, dataInfo)))
|
||||
|
||||
List(rel)
|
||||
|
@ -417,14 +417,14 @@ case object Crossref2Oaf {
|
|||
null
|
||||
}
|
||||
|
||||
def generateRelation(sourceId: String, targetId: String, relClass: String): Relation = {
|
||||
def generateRelation(sourceId: String, targetId: String, relClass: Relation.RELCLASS): Relation = {
|
||||
|
||||
val r = new Relation
|
||||
r.setSource(sourceId)
|
||||
r.setTarget(targetId)
|
||||
r.setRelType(ModelConstants.RESULT_PROJECT)
|
||||
r.setRelType(Relation.RELTYPE.resultProject)
|
||||
r.setRelClass(relClass)
|
||||
r.setSubRelType(ModelConstants.OUTCOME)
|
||||
r.setSubRelType(Relation.SUBRELTYPE.outcome)
|
||||
|
||||
r.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, dataInfo)))
|
||||
r
|
||||
|
@ -435,14 +435,15 @@ case object Crossref2Oaf {
|
|||
nsPrefix: String,
|
||||
extractField: String => String
|
||||
): Unit = {
|
||||
|
||||
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
||||
funder.award.get
|
||||
.map(extractField)
|
||||
.filter(a => a != null && a.nonEmpty)
|
||||
.foreach(award => {
|
||||
val targetId = getProjectId(nsPrefix, DHPUtils.md5(award))
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -471,21 +472,21 @@ case object Crossref2Oaf {
|
|||
case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
|
||||
case "10.13039/501100000038" =>
|
||||
val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
case "10.13039/501100000155" =>
|
||||
val targetId = getProjectId("sshrc_______", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
case "10.13039/501100000024" =>
|
||||
val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
|
||||
case "10.13039/100020031" =>
|
||||
val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
|
||||
case "10.13039/501100005416" => generateSimpleRelationFromAward(funder, "rcn_________", a => a)
|
||||
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
||||
|
@ -495,8 +496,8 @@ case object Crossref2Oaf {
|
|||
case "10.13039/501100003407" =>
|
||||
generateSimpleRelationFromAward(funder, "miur________", a => a)
|
||||
val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
case "10.13039/501100006588" | "10.13039/501100004488" =>
|
||||
generateSimpleRelationFromAward(
|
||||
funder,
|
||||
|
@ -509,15 +510,15 @@ case object Crossref2Oaf {
|
|||
case "10.13039/100004440" =>
|
||||
generateSimpleRelationFromAward(funder, "wt__________", a => a)
|
||||
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
//ASAP
|
||||
case "10.13039/100018231" => generateSimpleRelationFromAward(funder, "asap________", a => a)
|
||||
//CHIST-ERA
|
||||
case "10.13039/501100001942" =>
|
||||
val targetId = getProjectId("chistera____", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
//HE
|
||||
case "10.13039/100018693" | "10.13039/100018694" | "10.13039/100019188" | "10.13039/100019180" |
|
||||
"10.13039/100018695" | "10.13039/100019185" | "10.13039/100019186" | "10.13039/100019187" =>
|
||||
|
@ -559,8 +560,8 @@ case object Crossref2Oaf {
|
|||
case "Wellcome Trust Masters Fellowship" =>
|
||||
generateSimpleRelationFromAward(funder, "wt__________", a => a)
|
||||
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
|
||||
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
||||
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
||||
queue += generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy)
|
||||
queue += generateRelation(targetId, sourceId, Relation.RELCLASS.produces)
|
||||
case _ => logger.debug("no match for " + funder.name)
|
||||
|
||||
}
|
||||
|
|
|
@ -144,7 +144,7 @@ class CrossrefMappingTest {
|
|||
val relationList: List[Relation] = result
|
||||
.filter(s => s.isInstanceOf[Relation])
|
||||
.map(r => r.asInstanceOf[Relation])
|
||||
.filter(r => r.getSubRelType.equalsIgnoreCase(ModelConstants.CITATION))
|
||||
.filter(r => r.getSubRelType.equalsIgnoreCase(Relation.SUBRELTYPE.citation))
|
||||
|
||||
assertNotNull(relationList)
|
||||
assertFalse(relationList.isEmpty)
|
||||
|
|
|
@ -112,15 +112,16 @@ public class PropagationConstant {
|
|||
String className
|
||||
|
||||
) {
|
||||
|
||||
ArrayList<Relation> newRelations = new ArrayList();
|
||||
newRelations
|
||||
.add(
|
||||
getRelation(
|
||||
orgId,
|
||||
resultId,
|
||||
ModelConstants.IS_AUTHOR_INSTITUTION_OF,
|
||||
ModelConstants.RESULT_ORGANIZATION,
|
||||
ModelConstants.AFFILIATION,
|
||||
Relation.RELCLASS.isAuthorInstitutionOf,
|
||||
Relation.RELTYPE.resultOrganization,
|
||||
Relation.SUBRELTYPE.affiliation,
|
||||
PROPAGATION_DATA_INFO_TYPE,
|
||||
classID,
|
||||
className));
|
||||
|
@ -129,9 +130,9 @@ public class PropagationConstant {
|
|||
getRelation(
|
||||
resultId,
|
||||
orgId,
|
||||
ModelConstants.HAS_AUTHOR_INSTITUTION,
|
||||
ModelConstants.RESULT_ORGANIZATION,
|
||||
ModelConstants.AFFILIATION,
|
||||
Relation.RELCLASS.hasAuthorInstitution,
|
||||
Relation.RELTYPE.resultOrganization,
|
||||
Relation.SUBRELTYPE.affiliation,
|
||||
PROPAGATION_DATA_INFO_TYPE,
|
||||
classID,
|
||||
className));
|
||||
|
@ -142,9 +143,9 @@ public class PropagationConstant {
|
|||
public static Relation getRelation(
|
||||
String source,
|
||||
String target,
|
||||
String rel_class,
|
||||
String rel_type,
|
||||
String subrel_type,
|
||||
Relation.RELCLASS rel_class,
|
||||
Relation.RELTYPE rel_type,
|
||||
Relation.SUBRELTYPE subrel_type,
|
||||
String inference_provenance,
|
||||
String inference_class_id,
|
||||
String inference_class_name) {
|
||||
|
|
|
@ -96,7 +96,7 @@ public class PrepareDatasourceCountryAssociation {
|
|||
// filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass
|
||||
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class)
|
||||
.filter(
|
||||
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY));
|
||||
(FilterFunction<Relation>) rel -> rel.getRelClass() == Relation.RELCLASS.isProvidedBy);
|
||||
|
||||
// filtering of the organization taking only the non deleted by inference and those with information about the
|
||||
// country
|
||||
|
|
|
@ -81,7 +81,7 @@ public class PrepareProjectResultsAssociation {
|
|||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND lower(relClass) = '"
|
||||
+ ModelConstants.IS_PRODUCED_BY.toLowerCase()
|
||||
+ Relation.RELCLASS.isProducedBy.toString().toLowerCase()
|
||||
+ "'";
|
||||
|
||||
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
|
||||
|
|
|
@ -112,9 +112,9 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
getRelation(
|
||||
projectId,
|
||||
resId,
|
||||
ModelConstants.PRODUCES,
|
||||
ModelConstants.RESULT_PROJECT,
|
||||
ModelConstants.OUTCOME,
|
||||
Relation.RELCLASS.produces,
|
||||
Relation.RELTYPE.resultProject,
|
||||
Relation.SUBRELTYPE.outcome,
|
||||
PROPAGATION_DATA_INFO_TYPE,
|
||||
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID,
|
||||
PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)));
|
||||
|
|
|
@ -78,13 +78,13 @@ public class PrepareResultCommunitySet {
|
|||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND lower(relClass) = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||
+ Relation.RELCLASS.hasAuthorInstitution.toString().toLowerCase()
|
||||
+ "') result_organization "
|
||||
+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND lower(relClass) = '"
|
||||
+ ModelConstants.MERGES.toLowerCase()
|
||||
+ Relation.RELCLASS.merges.toString().toLowerCase()
|
||||
+ "' "
|
||||
+ " GROUP BY source) organization_organization "
|
||||
+ "ON result_organization.target = organization_organization.source ";
|
||||
|
|
|
@ -109,7 +109,7 @@ public class PrepareResultInstRepoAssociation {
|
|||
+ "JOIN ( SELECT source, target "
|
||||
+ "FROM relation "
|
||||
+ "WHERE lower(relclass) = '"
|
||||
+ ModelConstants.IS_PROVIDED_BY.toLowerCase()
|
||||
+ Relation.RELCLASS.isProvidedBy.toString().toLowerCase()
|
||||
+ "' "
|
||||
+ "AND datainfo.deletedbyinference = false ) rel "
|
||||
+ "ON d.id = rel.source ";
|
||||
|
@ -129,7 +129,7 @@ public class PrepareResultInstRepoAssociation {
|
|||
+ "from relation "
|
||||
+ "where datainfo.deletedbyinference = false "
|
||||
+ "and lower(relClass) = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||
+ Relation.RELCLASS.hasAuthorInstitution.toString().toLowerCase()
|
||||
+ "' "
|
||||
+ "group by source";
|
||||
|
||||
|
|
|
@ -142,9 +142,9 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
.add(
|
||||
getRelation(
|
||||
resultId, orgId,
|
||||
ModelConstants.HAS_AUTHOR_INSTITUTION,
|
||||
ModelConstants.RESULT_ORGANIZATION,
|
||||
ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE,
|
||||
Relation.RELCLASS.hasAuthorInstitution,
|
||||
Relation.RELTYPE.resultOrganization,
|
||||
Relation.SUBRELTYPE.affiliation, PROPAGATION_DATA_INFO_TYPE,
|
||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID,
|
||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME))
|
||||
|
||||
|
|
|
@ -36,14 +36,14 @@ public class PrepareInfo implements Serializable {
|
|||
private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet "
|
||||
+
|
||||
"FROM relation " +
|
||||
"WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() +
|
||||
"WHERE lower(relclass) = '" + Relation.RELCLASS.IsParentOf.toString().toLowerCase() +
|
||||
"' and datainfo.deletedbyinference = false " +
|
||||
"GROUP BY target";
|
||||
|
||||
// associates results with all the orgs they are affiliated to
|
||||
private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " +
|
||||
"FROM relation " +
|
||||
"WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() +
|
||||
"WHERE lower(relclass) = '" + Relation.RELCLASS.hasAuthorInstitution.toString().toLowerCase() +
|
||||
"' and datainfo.deletedbyinference = false " +
|
||||
"GROUP BY source";
|
||||
|
||||
|
@ -115,7 +115,7 @@ public class PrepareInfo implements Serializable {
|
|||
|
||||
relation
|
||||
.filter(
|
||||
(FilterFunction<Relation>) r -> r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION))
|
||||
(FilterFunction<Relation>) r -> r.getRelClass().equals(Relation.RELCLASS.hasAuthorInstitution))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
@ -124,14 +124,14 @@ public class PrepareInfo implements Serializable {
|
|||
Dataset<String> children = spark
|
||||
.sql(
|
||||
"Select distinct target as child from relation where " +
|
||||
"lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() +
|
||||
"lower(relclass)='" + Relation.RELCLASS.IsParentOf.toString().toLowerCase() +
|
||||
"' and datainfo.deletedbyinference = false")
|
||||
.as(Encoders.STRING());
|
||||
|
||||
Dataset<String> parent = spark
|
||||
.sql(
|
||||
"Select distinct source as parent from relation " +
|
||||
"where lower(relclass)='" + ModelConstants.IS_PARENT_OF.toLowerCase() +
|
||||
"where lower(relclass)='" + Relation.RELCLASS.IsParentOf.toString().toLowerCase() +
|
||||
"' and datainfo.deletedbyinference = false")
|
||||
.as(Encoders.STRING());
|
||||
|
||||
|
|
|
@ -193,9 +193,9 @@ public class StepActions implements Serializable {
|
|||
orgId -> getRelation(
|
||||
v.getKey(),
|
||||
orgId,
|
||||
ModelConstants.HAS_AUTHOR_INSTITUTION,
|
||||
ModelConstants.RESULT_ORGANIZATION,
|
||||
ModelConstants.AFFILIATION,
|
||||
Relation.RELCLASS.hasAuthorInstitution,
|
||||
Relation.RELTYPE.resultOrganization,
|
||||
Relation.SUBRELTYPE.affiliation,
|
||||
PROPAGATION_DATA_INFO_TYPE,
|
||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_ID,
|
||||
PROPAGATION_RELATION_RESULT_ORGANIZATION_SEM_REL_CLASS_NAME))
|
||||
|
|
|
@ -119,8 +119,8 @@ public class SparkJobTest {
|
|||
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||
|
||||
Assertions.assertEquals(18, tmp.count());
|
||||
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
|
||||
tmp.foreach(r -> Assertions.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
|
||||
tmp.foreach(r -> Assertions.assertEquals(Relation.SUBRELTYPE.affiliation, r.getSubRelType()));
|
||||
tmp.foreach(r -> Assertions.assertEquals(Relation.RELTYPE.resultOrganization, r.getRelType()));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> Assertions
|
||||
|
@ -148,7 +148,7 @@ public class SparkJobTest {
|
|||
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("50|")).count());
|
||||
tmp
|
||||
.filter(r -> r.getSource().substring(0, 3).equals("50|"))
|
||||
.foreach(r -> Assertions.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
|
||||
.foreach(r -> Assertions.assertEquals(Relation.RELCLASS.hasAuthorInstitution, r.getRelClass()));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, tmp.filter(r -> r.getSource().equals("50|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
|
||||
|
@ -168,7 +168,7 @@ public class SparkJobTest {
|
|||
Assertions.assertEquals(9, tmp.filter(r -> r.getSource().substring(0, 3).equals("20|")).count());
|
||||
tmp
|
||||
.filter(r -> r.getSource().substring(0, 3).equals("20|"))
|
||||
.foreach(r -> Assertions.assertEquals(ModelConstants.IS_AUTHOR_INSTITUTION_OF, r.getRelClass()));
|
||||
.foreach(r -> Assertions.assertEquals(Relation.RELCLASS.isAuthorInstitutionOf, r.getRelClass()));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp.filter(r -> r.getSource().equals("20|doajarticles::1cae0b82b56ccd97c2db1f698def7074")).count());
|
||||
|
|
|
@ -117,17 +117,17 @@ public class StepActionsTest {
|
|||
verificationDs
|
||||
.foreach(
|
||||
(ForeachFunction<Relation>) r -> Assertions
|
||||
.assertEquals(ModelConstants.HAS_AUTHOR_INSTITUTION, r.getRelClass()));
|
||||
.assertEquals(Relation.RELCLASS.hasAuthorInstitution, r.getRelClass()));
|
||||
|
||||
verificationDs
|
||||
.foreach(
|
||||
(ForeachFunction<Relation>) r -> Assertions
|
||||
.assertEquals(ModelConstants.RESULT_ORGANIZATION, r.getRelType()));
|
||||
.assertEquals(Relation.RELTYPE.resultOrganization, r.getRelType()));
|
||||
|
||||
verificationDs
|
||||
.foreach(
|
||||
(ForeachFunction<Relation>) r -> Assertions
|
||||
.assertEquals(ModelConstants.AFFILIATION, r.getSubRelType()));
|
||||
.assertEquals(Relation.SUBRELTYPE.affiliation, r.getSubRelType()));
|
||||
|
||||
verificationDs
|
||||
.foreach(
|
||||
|
|
|
@ -89,7 +89,7 @@ public class GetDatasourceFromCountry implements Serializable {
|
|||
(MapFunction<String, Relation>) value -> OBJECT_MAPPER.readValue(value, Relation.class),
|
||||
Encoders.bean(Relation.class))
|
||||
.filter(
|
||||
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY));
|
||||
(FilterFunction<Relation>) rel -> rel.getRelClass().equalsIgnoreCase(Relation.RELCLASS.isProvidedBy));
|
||||
|
||||
organization
|
||||
.joinWith(relation, organization.col("id").equalTo(relation.col("target")))
|
||||
|
|
|
@ -1,33 +1,26 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Entity;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.validator.routines.UrlValidator;
|
||||
import org.dom4j.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.createOpenaireId;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
|
||||
public abstract class AbstractMdRecordToOafMapper {
|
||||
|
||||
|
@ -261,7 +254,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
res
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity, validationdDate));
|
||||
.getRelation(projectId, docId, Relation.RELTYPE.resultProject, Relation.SUBRELTYPE.outcome, Relation.RELCLASS.produces, entity, validationdDate));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -276,12 +269,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
Element element = (Element) o;
|
||||
|
||||
final String target = StringUtils.trim(element.getText());
|
||||
final String relType = element.attributeValue("relType");
|
||||
final String subRelType = element.attributeValue("subRelType");
|
||||
final String relClass = element.attributeValue("relClass");
|
||||
final Relation.RELTYPE relType = Relation.RELTYPE.valueOf(element.attributeValue("relType"));
|
||||
final Relation.SUBRELTYPE subRelType = Relation.SUBRELTYPE.valueOf(element.attributeValue("subRelType"));
|
||||
final Relation.RELCLASS relClass = Relation.RELCLASS.lookUp(element.attributeValue("relClass"));
|
||||
|
||||
if (StringUtils.isNotBlank(target) && StringUtils.isNotBlank(relType) && StringUtils.isNotBlank(subRelType)
|
||||
&& StringUtils.isNotBlank(relClass)) {
|
||||
if (StringUtils.isNotBlank(target)) {
|
||||
|
||||
final String validationdDate = ((Node) o).valueOf("@validationDate");
|
||||
|
||||
|
|
|
@ -90,6 +90,8 @@ object SparkResolveEntities {
|
|||
case EntityType.dataset => mapper.readValue(input, classOf[OafDataset])
|
||||
case EntityType.software => mapper.readValue(input, classOf[Software])
|
||||
case EntityType.otherresearchproduct => mapper.readValue(input, classOf[OtherResearchProduct])
|
||||
case _ => throw new IllegalArgumentException(s"Unexpected entity type $entity")
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -104,24 +104,12 @@ object SparkConvertRDDtoDataset {
|
|||
|
||||
log.info("Converting Relation")
|
||||
|
||||
val relClassFilter = List(
|
||||
ModelConstants.MERGES,
|
||||
ModelConstants.IS_MERGED_IN,
|
||||
ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS,
|
||||
ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS
|
||||
)
|
||||
|
||||
val rddRelation = spark.sparkContext
|
||||
.textFile(s"$sourcePath/relation")
|
||||
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||
.filter(r => filterRelations(r))
|
||||
//filter OpenCitations relations
|
||||
// .filter(r =>
|
||||
// r.getDataInfo.getProvenanceaction != null &&
|
||||
// !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid)
|
||||
// )
|
||||
|
||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||
}
|
||||
|
||||
|
@ -133,12 +121,12 @@ object SparkConvertRDDtoDataset {
|
|||
*/
|
||||
|
||||
val relClassFilter = List(
|
||||
ModelConstants.MERGES,
|
||||
ModelConstants.IS_MERGED_IN,
|
||||
ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS,
|
||||
ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS
|
||||
Relation.RELCLASS.merges,
|
||||
Relation.RELCLASS.isMergedIn,
|
||||
Relation.RELCLASS.HasAmongTopNSimilarDocuments,
|
||||
Relation.RELCLASS.IsAmongTopNSimilarDocuments
|
||||
)
|
||||
if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
||||
if (relClassFilter.contains(r.getRelClass))
|
||||
false
|
||||
else {
|
||||
if (r.getProvenance == null || r.getProvenance.isEmpty)
|
||||
|
|
|
@ -112,7 +112,6 @@ object SparkCreateInputGraph {
|
|||
log.info(s"Extract ${clazz.getSimpleName}")
|
||||
oafDs
|
||||
.filter(o => o.isInstanceOf[T])
|
||||
.map(p => p.asInstanceOf[T])
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(targetPath)
|
||||
|
|
|
@ -51,7 +51,7 @@ object SparkCreateScholix {
|
|||
val relationDS: Dataset[(String, Relation)] = spark.read
|
||||
.load(relationPath)
|
||||
.as[Relation]
|
||||
.filter(r => !r.getRelClass.toLowerCase.contains("merge"))
|
||||
.filter(r => !r.getRelClass.toString.toLowerCase.contains("merge"))
|
||||
.map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder))
|
||||
|
||||
val summaryDS: Dataset[(String, ScholixSummary)] = spark.read
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.DocumentException;
|
||||
|
@ -21,14 +17,16 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
|||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class MappersTest {
|
||||
|
@ -129,7 +127,7 @@ class MappersTest {
|
|||
.stream()
|
||||
.filter(o -> o instanceof Relation)
|
||||
.map(o -> (Relation) o)
|
||||
.filter(r -> ModelConstants.RESULT_PROJECT.equals(r.getRelType()))
|
||||
.filter(r -> Relation.RELTYPE.resultProject.equals(r.getRelType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
assertEquals(2, resultProject.size());
|
||||
|
@ -152,7 +150,7 @@ class MappersTest {
|
|||
.stream()
|
||||
.filter(o -> o instanceof Relation)
|
||||
.map(o -> (Relation) o)
|
||||
.filter(r -> ModelConstants.RESULT_ORGANIZATION.equals(r.getRelType()))
|
||||
.filter(r -> Relation.RELTYPE.resultOrganization.equals(r.getRelType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
assertEquals(2, affiliation.size());
|
||||
|
@ -297,17 +295,17 @@ class MappersTest {
|
|||
|
||||
assertEquals(d.getId(), r1.getSource());
|
||||
assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget());
|
||||
assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType());
|
||||
assertEquals(ModelConstants.OUTCOME, r1.getSubRelType());
|
||||
assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultProject, r1.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.outcome, r1.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.isProducedBy, r1.getRelClass());
|
||||
assertTrue(r1.getValidated());
|
||||
assertEquals("2020-01-01", r1.getValidationDate());
|
||||
|
||||
assertEquals(d.getId(), r2.getTarget());
|
||||
assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource());
|
||||
assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType());
|
||||
assertEquals(ModelConstants.OUTCOME, r2.getSubRelType());
|
||||
assertEquals(ModelConstants.PRODUCES, r2.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultProject, r2.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.outcome, r2.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.produces, r2.getRelClass());
|
||||
assertTrue(r2.getValidated());
|
||||
assertEquals("2020-01-01", r2.getValidationDate());
|
||||
|
||||
|
@ -597,15 +595,15 @@ class MappersTest {
|
|||
|
||||
assertEquals(s.getId(), r1.getSource());
|
||||
assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r1.getTarget());
|
||||
assertEquals(ModelConstants.RESULT_RESULT, r1.getRelType());
|
||||
assertEquals(ModelConstants.RELATIONSHIP, r1.getSubRelType());
|
||||
assertEquals(ModelConstants.IS_REFERENCED_BY, r1.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultResult, r1.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.relationship, r1.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.IsReferencedBy, r1.getRelClass());
|
||||
|
||||
assertEquals(s.getId(), r2.getTarget());
|
||||
assertEquals("50|doi_________::b453e7b4b2130ace57ff0c3db470a982", r2.getSource());
|
||||
assertEquals(ModelConstants.RESULT_RESULT, r2.getRelType());
|
||||
assertEquals(ModelConstants.RELATIONSHIP, r2.getSubRelType());
|
||||
assertEquals(ModelConstants.REFERENCES, r2.getRelClass());
|
||||
assertEquals(Relation.RELTYPE.resultResult, r2.getRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.relationship, r2.getSubRelType());
|
||||
assertEquals(Relation.RELCLASS.References, r2.getRelClass());
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -246,10 +246,10 @@ class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(ModelConstants.DATASOURCE_ORGANIZATION, r1.getRelType());
|
||||
assertEquals(ModelConstants.DATASOURCE_ORGANIZATION, r2.getRelType());
|
||||
|
||||
assertEquals(ModelConstants.PROVISION, r1.getSubRelType());
|
||||
assertEquals(ModelConstants.PROVISION, r2.getSubRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.provision, r1.getSubRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.provision, r2.getSubRelType());
|
||||
|
||||
assertEquals(ModelConstants.IS_PROVIDED_BY, r1.getRelClass());
|
||||
assertEquals(Relation.RELCLASS.isProvidedBy, r1.getRelClass());
|
||||
assertEquals(ModelConstants.PROVIDES, r2.getRelClass());
|
||||
}
|
||||
|
||||
|
@ -272,7 +272,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
assertValidId(rel.getProvenance().get(0).getCollectedfrom().getKey());
|
||||
|
||||
assertEquals(ModelConstants.PROJECT_ORGANIZATION, rel.getRelType());
|
||||
assertEquals(ModelConstants.PARTICIPATION, rel.getSubRelType());
|
||||
assertEquals(Relation.SUBRELTYPE.participation, rel.getSubRelType());
|
||||
assertEquals(ModelConstants.IS_PARTICIPANT, rel.getRelClass());
|
||||
|
||||
assertNotNull(rel.getProperties());
|
||||
|
|
|
@ -3,11 +3,9 @@ package eu.dnetlib.dhp.oa.provision;
|
|||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Optional;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
@ -80,10 +78,15 @@ public class PrepareRelationsJob {
|
|||
.orElse(DEFAULT_NUM_PARTITIONS);
|
||||
log.info("relPartitions: {}", relPartitions);
|
||||
|
||||
Set<String> relationFilter = Optional
|
||||
Set<Relation.RELCLASS> relationFilter = Optional
|
||||
.ofNullable(parser.get("relationFilter"))
|
||||
.map(String::toLowerCase)
|
||||
.map(s -> Sets.newHashSet(Splitter.on(",").split(s)))
|
||||
.map(s -> Sets.newHashSet(
|
||||
StreamSupport.stream(
|
||||
Splitter.on(",").split(s).spliterator(), false)
|
||||
.map(Relation.RELCLASS::valueOf)
|
||||
.collect(Collectors.toList())
|
||||
) )
|
||||
.orElse(new HashSet<>());
|
||||
log.info("relationFilter: {}", relationFilter);
|
||||
|
||||
|
@ -128,11 +131,11 @@ public class PrepareRelationsJob {
|
|||
* @param relPartitions number of partitions for the output RDD
|
||||
*/
|
||||
private static void prepareRelationsRDD(SparkSession spark, String inputRelationsPath, String outputPath,
|
||||
Set<String> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
|
||||
Set<Relation.RELCLASS> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
|
||||
|
||||
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
|
||||
.filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved")))
|
||||
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())));
|
||||
.filter(rel -> !relationFilter.contains(rel.getRelClass()));
|
||||
|
||||
JavaRDD<Relation> pruned = pruneRels(
|
||||
pruneRels(
|
||||
|
|
|
@ -13,21 +13,23 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
|||
|
||||
public class RelationComparator implements Comparator<Relation> {
|
||||
|
||||
private static final Map<String, Integer> weights = Maps.newHashMap();
|
||||
private static final Map<Relation.SUBRELTYPE, Integer> weights = Maps.newHashMap();
|
||||
|
||||
|
||||
static {
|
||||
weights.put(ModelConstants.OUTCOME, 0);
|
||||
weights.put(ModelConstants.SUPPLEMENT, 1);
|
||||
weights.put(ModelConstants.REVIEW, 2);
|
||||
weights.put(ModelConstants.CITATION, 3);
|
||||
weights.put(ModelConstants.AFFILIATION, 4);
|
||||
weights.put(ModelConstants.RELATIONSHIP, 5);
|
||||
weights.put(ModelConstants.PUBLICATION_DATASET, 6);
|
||||
weights.put(ModelConstants.SIMILARITY, 7);
|
||||
weights.put(Relation.SUBRELTYPE.outcome, 0);
|
||||
weights.put(Relation.SUBRELTYPE.supplement, 1);
|
||||
weights.put(Relation.SUBRELTYPE.review, 2);
|
||||
weights.put(Relation.SUBRELTYPE.citation, 3);
|
||||
weights.put(Relation.SUBRELTYPE.affiliation, 4);
|
||||
//TODO CLAUDIO PLEASE CHECK IF the SUBSTITUTION OF publicationDataset WITH RELATIONSHIPS IS OK
|
||||
// weights.put(Relation.SUBRELTYPE.relationship, 5);
|
||||
weights.put(Relation.SUBRELTYPE.relationship, 6);
|
||||
weights.put(Relation.SUBRELTYPE.similarity, 7);
|
||||
|
||||
weights.put(ModelConstants.PROVISION, 8);
|
||||
weights.put(ModelConstants.PARTICIPATION, 9);
|
||||
weights.put(ModelConstants.DEDUP, 10);
|
||||
weights.put(Relation.SUBRELTYPE.provision, 8);
|
||||
weights.put(Relation.SUBRELTYPE.participation, 9);
|
||||
weights.put(Relation.SUBRELTYPE.dedup, 10);
|
||||
}
|
||||
|
||||
private Integer getWeight(Relation o) {
|
||||
|
|
|
@ -14,23 +14,24 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
|||
|
||||
public class SortableRelation extends Relation implements Comparable<SortableRelation>, Serializable {
|
||||
|
||||
private static final Map<String, Integer> weights = Maps.newHashMap();
|
||||
private static final Map<Relation.SUBRELTYPE, Integer> weights = Maps.newHashMap();
|
||||
|
||||
//TODO version and part missing why publication is there?
|
||||
|
||||
static {
|
||||
weights.put(ModelConstants.OUTCOME, 0);
|
||||
weights.put(ModelConstants.SUPPLEMENT, 1);
|
||||
weights.put(ModelConstants.REVIEW, 2);
|
||||
weights.put(ModelConstants.CITATION, 3);
|
||||
weights.put(ModelConstants.AFFILIATION, 4);
|
||||
weights.put(ModelConstants.RELATIONSHIP, 5);
|
||||
weights.put(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, 6);
|
||||
weights.put(ModelConstants.SIMILARITY, 7);
|
||||
weights.put(Relation.SUBRELTYPE.outcome, 0);
|
||||
weights.put(Relation.SUBRELTYPE.supplement, 1);
|
||||
weights.put(Relation.SUBRELTYPE.review, 2);
|
||||
weights.put(Relation.SUBRELTYPE.citation, 3);
|
||||
weights.put(Relation.SUBRELTYPE.affiliation, 4);
|
||||
weights.put(Relation.SUBRELTYPE.relationship, 5);
|
||||
//weights.put(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, 6);
|
||||
weights.put(Relation.SUBRELTYPE.similarity, 7);
|
||||
|
||||
weights.put(ModelConstants.PROVISION, 8);
|
||||
weights.put(ModelConstants.PARTICIPATION, 9);
|
||||
weights.put(ModelConstants.DEDUP, 10);
|
||||
weights.put(Relation.SUBRELTYPE.provision, 8);
|
||||
weights.put(Relation.SUBRELTYPE.participation, 9);
|
||||
weights.put(Relation.SUBRELTYPE.dedup, 10);
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 34753984579L;
|
||||
|
||||
private String groupingKey;
|
||||
|
|
|
@ -17,19 +17,19 @@ public class SortableRelationKey implements Comparable<SortableRelationKey>, Ser
|
|||
private static final Map<String, Integer> weights = Maps.newHashMap();
|
||||
|
||||
static {
|
||||
weights.put(ModelConstants.PARTICIPATION, 0);
|
||||
weights.put(ModelConstants.OUTCOME, 1);
|
||||
weights.put(ModelConstants.AFFILIATION, 2);
|
||||
weights.put(ModelConstants.DEDUP, 3);
|
||||
weights.put(Relation.SUBRELTYPE.participation, 0);
|
||||
weights.put(Relation.SUBRELTYPE.outcome, 1);
|
||||
weights.put(Relation.SUBRELTYPE.affiliation, 2);
|
||||
weights.put(Relation.SUBRELTYPE.dedup, 3);
|
||||
weights.put(ModelConstants.PUBLICATION_DATASET, 4);
|
||||
weights.put(ModelConstants.SUPPLEMENT, 5);
|
||||
weights.put(ModelConstants.REVIEW, 6);
|
||||
weights.put(ModelConstants.RELATIONSHIP, 7);
|
||||
weights.put(Relation.SUBRELTYPE.supplement, 5);
|
||||
weights.put(Relation.SUBRELTYPE.review, 6);
|
||||
weights.put(Relation.SUBRELTYPE.relationship, 7);
|
||||
weights.put(ModelConstants.PART, 8);
|
||||
weights.put(ModelConstants.PROVISION, 9);
|
||||
weights.put(Relation.SUBRELTYPE.provision, 9);
|
||||
weights.put(ModelConstants.VERSION, 10);
|
||||
weights.put(ModelConstants.SIMILARITY, 11);
|
||||
weights.put(ModelConstants.CITATION, 12);
|
||||
weights.put(Relation.SUBRELTYPE.similarity, 11);
|
||||
weights.put(Relation.SUBRELTYPE.citation, 12);
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 3232323;
|
||||
|
|
|
@ -1459,7 +1459,7 @@ public class XmlRecordFactory implements Serializable {
|
|||
}
|
||||
|
||||
private boolean isDuplicate(final RelatedEntityWrapper link) {
|
||||
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
|
||||
return Relation.SUBRELTYPE.dedup.equalsIgnoreCase(link.getRelation().getSubRelType());
|
||||
}
|
||||
|
||||
private List<String> listExtraInfo(final Entity entity) {
|
||||
|
|
Loading…
Reference in New Issue