From c2998a14e8e998cc496a8e8db803e48bafdb43e4 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Thu, 6 Jul 2023 20:28:16 +0300 Subject: [PATCH] Add basic tests for affiliation relations --- .../PrepareAffiliationRelationsTest.java | 108 +++++++++--------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java index eba53ccdb..c76fcf6a9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java @@ -5,8 +5,11 @@ import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.io.FileUtils; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; @@ -26,7 +29,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.oaf.Result; public class PrepareAffiliationRelationsTest { @@ -35,6 +37,7 @@ public class PrepareAffiliationRelationsTest { private static SparkSession spark; private static Path workingDir; + private static final String ID_PREFIX = "50|doi_________::"; private static final Logger log = LoggerFactory .getLogger(PrepareAffiliationRelationsTest.class); @@ -69,71 +72,64 @@ public class PrepareAffiliationRelationsTest { @Test void testMatch() throws Exception { + String affiliationRelationsPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json") .getPath(); + String outputPath = workingDir.toString() + "/actionSet"; + PrepareAffiliationRelations .main( new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - affiliationRelationsPath, - "-outputPath", - workingDir.toString() + "/actionSet" + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-inputPath", affiliationRelationsPath, + "-outputPath", outputPath }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); -// JavaRDD tmp = sc -// .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) -// .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) -// .map(aa -> ((Result) aa.getPayload())); -// -// assertEquals(4, tmp.count()); -// -// Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Result.class)); -// verificationDataset.createOrReplaceTempView("result"); -// -// Dataset execVerification = spark -// .sql( -// "Select p.id oaid, mes.id, mUnit.value from result p " + -// "lateral view explode(measures) m as mes " + -// "lateral view explode(mes.unit) u as mUnit "); -// -// Assertions.assertEquals(12, execVerification.count()); -// Assertions -// .assertEquals( -// "6.63451994567e-09", execVerification -// .filter( -// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + -// "and id = 'influence'") -// .select("value") -// .collectAsList() -// .get(0) -// .getString(0)); -// Assertions -// .assertEquals( -// "0.348694533145", execVerification -// .filter( -// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + -// "and id = 'popularity_alt'") -// .select("value") -// .collectAsList() -// .get(0) -// .getString(0)); -// Assertions -// .assertEquals( -// "2.16094680115e-09", execVerification -// .filter( -// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " + -// "and id = 'popularity'") -// .select("value") -// .collectAsList() -// .get(0) -// .getString(0)); -// + JavaRDD tmp = sc + .sequenceFile(outputPath, Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + for (Relation r : tmp.collect()) { + System.out.println( + r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred() + ); + } + // count the number of relations + assertEquals(16, tmp.count()); + + Dataset dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + dataset.createOrReplaceTempView("result"); + + Dataset execVerification = spark.sql("select r.relType, r.relClass, r.source, r.target, r.dataInfo.trust from result r"); + + // verify that we have equal number of bi-directional relations + Assertions.assertEquals(8, execVerification + .filter( + "relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION +"'") + .collectAsList() + .size()); + + Assertions.assertEquals(8, execVerification + .filter( + "relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF +"'") + .collectAsList() + .size()); + + // check confidence value of a specific relation + String sourceDOI = "10.1105/tpc.8.3.343"; + + final String sourceOpenaireId = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", sourceDOI)); + + Assertions.assertEquals("0.7071067812", execVerification + .filter( + "source='" + sourceOpenaireId +"'") + .collectAsList().get(0).getString(4)); + } }