From 4d8339614b5e1240243e4a139d494e1988f6746e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 4 Oct 2022 14:29:47 +0200 Subject: [PATCH] Revert "[BipFinder] Fixed issue for wrong escaped char in doi" This reverts commit 188f25eefae15d23eb92dbd977d6e41fd78af157. --- .../createunresolvedentities/PrepareBipFinder.java | 6 ++---- .../actionmanager/createunresolvedentities/PrepareTest.java | 5 +---- .../dhp/actionmanager/createunresolvedentities/bip/bip.json | 3 +-- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index a4f5c22bc..80573c71a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,7 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import com.fasterxml.jackson.core.JsonParser; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -41,6 +40,7 @@ import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareBipFinder implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { @@ -82,11 +82,9 @@ public class PrepareBipFinder implements Serializable { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - ObjectMapper mapper = new ObjectMapper() - .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); JavaRDD bipDeserializeJavaRDD = sc .textFile(inputPath) - .map(item -> mapper.readValue(item, BipDeserialize.class)); + .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); spark .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index d0ce69043..cc8108bde 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -88,7 +88,7 @@ public class PrepareTest { .textFile(workingDir.toString() + "/work/bip") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - Assertions.assertEquals(87, tmp.count()); + Assertions.assertEquals(86, tmp.count()); String doi1 = "unresolved::10.0000/096020199389707::doi"; @@ -151,9 +151,6 @@ public class PrepareTest { Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); - tmp.filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) - .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); - } @Test diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json index 3a077ab5c..03cef4be1 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json @@ -83,5 +83,4 @@ {"10.0000/hoplos.v4i7.41295": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} {"10.0000/hoplos.v4i7.42830": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} {"10.0000/hoplos.v4i7.42861": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} -{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} -{"10.2111/1551-5028(2004)057\[0539:sdsocg\]2.0.co;2": [{"id":"influence", "unit":[{"key":"score","value":"6.3290875E-9"},{"key":"class","value":"C"}]}, {"id":"popularity", "unit":[{"key":"score","value":"6.576763E-9"},{"key":"class","value":"C"}]}, {"id":"influence_alt", "unit":[{"key":"score","value":"11"},{"key":"class","value":"C"}]}, {"id":"popularity_alt", "unit":[{"key":"score","value":"1.0142108"},{"key":"class","value":"C"}]}, {"id":"impulse", "unit":[{"key":"score","value":"1"},{"key":"class","value":"C"}]}]} \ No newline at end of file +{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} \ No newline at end of file