From 38dfebfbe6703f3200fb7f55845e56a2d16dbdb7 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 19 Jul 2023 14:18:56 +0200 Subject: [PATCH 1/7] Disable MdStoreClientTest test as it requires a local mongodb running and it does not perform any assertions --- .../src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java index f38d04979..f87f6e313 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java @@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; public class MdStoreClientTest { - @Test + // @Test public void testMongoCollection() throws IOException { final MdstoreClient client = new MdstoreClient("mongodb://localhost:27017", "mdstore"); From e47ed1fdb2003cad9fcbaa1e48a1177879dc22e1 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 19 Jul 2023 14:21:40 +0200 Subject: [PATCH 2/7] Use DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES in json mapper to avoid that tests fail if they encounter unmapped properties --- .../promote/PromoteActionPayloadForGraphTableJob.java | 4 +++- .../partition/PartitionActionSetsByPayloadTypeJobTest.java | 4 +++- .../promote/PromoteActionPayloadForGraphTableJobTest.java | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index c5f252c97..7b024bea8 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -20,6 +20,7 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException; @@ -33,7 +34,8 @@ import eu.dnetlib.dhp.schema.oaf.*; public class PromoteActionPayloadForGraphTableJob { private static final Logger logger = LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index 62eec13d5..de74350f3 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -31,6 +31,7 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.ISClient; @@ -46,7 +47,8 @@ public class PartitionActionSetsByPayloadTypeJobTest { private static Configuration configuration; private static SparkSession spark; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$ .apply( diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index df9202ed8..aa8c00786 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -25,6 +25,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -41,7 +42,8 @@ public class PromoteActionPayloadForGraphTableJobTest { private Path inputActionPayloadRootDir; private Path outputDir; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @BeforeAll public static void beforeAll() { From dba34505de8d3d8704529c66afe4acdcac8944fc Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 19 Jul 2023 14:24:52 +0200 Subject: [PATCH 3/7] Fix SparkStatsTest bug where parquet tables were incorrectly read as text files leading to unpredictable count() values --- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index b33b627e7..07e993444 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -145,34 +145,34 @@ public class SparkStatsTest implements Serializable { long orgs_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/organization_blockstats") .count(); long pubs_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/publication_blockstats") .count(); long sw_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/software_blockstats") .count(); long ds_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_blockstats") .count(); long orp_blocks = spark .read() - .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(480, orgs_blocks); - assertEquals(295, pubs_blocks); - assertEquals(122, sw_blocks); - assertEquals(191, ds_blocks); - assertEquals(178, orp_blocks); + assertEquals(414, orgs_blocks); + assertEquals(187, pubs_blocks); + assertEquals(128, sw_blocks); + assertEquals(192, ds_blocks); + assertEquals(194, orp_blocks); } @AfterAll From 0210a14e436f3c83ac610e721237f7c9c73f3255 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 20 Jul 2023 23:45:57 +0200 Subject: [PATCH 4/7] Ignore timestamp differences in PromoteActionPayloadForGraphTableJobTest --- .../promote/PromoteActionPayloadForGraphTableJobTest.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index aa8c00786..83153ae16 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -156,6 +156,10 @@ public class PromoteActionPayloadForGraphTableJobTest { List actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz) .collectAsList() .stream() + .map(s -> { + s.setLastupdatetimestamp(0L); + return s; + }) .sorted(Comparator.comparingInt(Object::hashCode)) .collect(Collectors.toList()); String expectedOutputGraphTableJsonDumpPath = resultFileLocation(strategy, rowClazz, actionPayloadClazz); @@ -168,6 +172,10 @@ public class PromoteActionPayloadForGraphTableJobTest { expectedOutputGraphTableJsonDumpFile.toString(), rowClazz) .collectAsList() .stream() + .map(s -> { + s.setLastupdatetimestamp(0L); + return s; + }) .sorted(Comparator.comparingInt(Object::hashCode)) .collect(Collectors.toList()); assertIterableEquals(expectedOutputRows, actualOutputRows); From 5e15f20e6e5b9961d842fcdf4590a61f84a31d2b Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 21 Jul 2023 00:46:54 +0200 Subject: [PATCH 5/7] Fix entityMerger that was excluding the authors of the first entity in the list to merge --- .../main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 7637cde93..60669106a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -110,6 +110,10 @@ public class DedupRecordFactory { // set authors and date if (ModelSupport.isSubClass(entity, Result.class)) { + Optional + .ofNullable(((Result) entity).getAuthor()) + .ifPresent(a -> authors.add(a)); + ((Result) entity).setAuthor(AuthorMerger.merge(authors)); } From 54c1eacef18d7759968abb89b84d870abbd77f47 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 21 Jul 2023 10:42:24 +0200 Subject: [PATCH 6/7] SparkJobTest was failing because testing workingdir was not cleaned up after eact test --- .../SparkJobTest.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java index 2e75c75ad..517a20cd9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -11,8 +11,10 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -58,6 +60,11 @@ public class SparkJobTest { .getOrCreate(); } + @AfterEach + public void afterEach() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + } + @AfterAll public static void afterAll() throws IOException { FileUtils.deleteDirectory(workingDir.toFile()); @@ -91,16 +98,19 @@ public class SparkJobTest { readPath(spark, leavesPath, Leaves.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/leavesInput"); readPath(spark, resultOrgPath, KeyValueSet.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/orgsInput"); readPath(spark, projectOrgPath, KeyValueSet.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); @@ -369,16 +379,19 @@ public class SparkJobTest { readPath(spark, leavesPath, Leaves.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/leavesInput"); readPath(spark, resultOrgPath, KeyValueSet.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/orgsInput"); readPath(spark, projectOrgPath, KeyValueSet.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); @@ -649,16 +662,19 @@ public class SparkJobTest { readPath(spark, leavesPath, Leaves.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/leavesInput"); readPath(spark, resultOrgPath, KeyValueSet.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/orgsInput"); readPath(spark, projectOrgPath, KeyValueSet.class) .write() + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); From f03153823a6c710f9cb56280a717fd041b325334 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 21 Jul 2023 10:48:28 +0200 Subject: [PATCH 7/7] Update testCitationRelations number of expected citations according to changes made in 0559d8b4 (monodirectional citations) --- .../eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index e0ef0e65c..fbf6f72c0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -149,7 +149,7 @@ class CrossrefMappingTest { assertNotNull(relationList) assertFalse(relationList.isEmpty) - assertEquals(doisReference.size * 2, relationList.size) + assertEquals(doisReference.size, relationList.size) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) relationList.foreach(p => println(mapper.writeValueAsString(p)))