From a24b9f8268a3e36a5032d40a103793677ab1dd8b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 18 Nov 2021 17:12:02 +0100 Subject: [PATCH] [dedup] trivial refactoring --- .../eu/dnetlib/dhp/oa/dedup/DedupUtility.java | 24 +++++++++++++++++++ .../dhp/oa/dedup/SparkCreateSimRels.java | 23 ++---------------- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 23 ++---------------- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index d79d24653..aeb485768 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -14,6 +14,9 @@ import org.xml.sax.SAXException; import com.google.common.collect.Sets; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -152,4 +155,25 @@ public class DedupUtility { return o1.compareTo(o2); } + public static Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass(ModelConstants.IS_SIMILAR_TO); + r.setDataInfo(new DataInfo()); + + switch (entity) { + case "result": + r.setRelType(ModelConstants.RESULT_RESULT); + break; + case "organization": + r.setRelType(ModelConstants.ORG_ORG_RELTYPE); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } + } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index f89f634b5..3aa8f241d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -20,6 +20,7 @@ import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -102,7 +103,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { .createDataset( Deduper .computeRelations(sc, blocks, dedupConf) - .map(t -> createSimRel(t._1(), t._2(), entity)) + .map(t -> DedupUtility.createSimRel(t._1(), t._2(), entity)) .repartition(numPartitions) .rdd(), Encoders.bean(Relation.class)); @@ -111,24 +112,4 @@ public class SparkCreateSimRels extends AbstractSparkAction { } } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); - - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index 7d91e47cc..1cfac9a27 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -124,31 +124,12 @@ public class SparkWhitelistSimRels extends AbstractSparkAction { Dataset whiteListSimRels = whiteListRels2 .map( - (MapFunction, Relation>) r -> createSimRel(r._1(), r._2(), entity), + (MapFunction, Relation>) r -> DedupUtility + .createSimRel(r._1(), r._2(), entity), Encoders.bean(Relation.class)); saveParquet(whiteListSimRels, outputPath, SaveMode.Append); } } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); - - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } }