From 2ebb1459a9c01eeda8d81865c75585e361cc5426 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 28 Sep 2022 14:36:57 +0300 Subject: [PATCH 1/7] Fixed type in no_downloads --- .../scripts/step16-createIndicatorsTables.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 417ed6e4e7..1bda076295 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -454,16 +454,16 @@ FROM publication_datasources pd compute stats indi_pub_hybrid_oa_with_cc; create table indi_pub_downloads stored as parquet as -SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats +SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join publication on result_id=id where downloads>0 GROUP BY result_id -order by no_dowloads desc; +order by no_downloads desc; compute stats indi_pub_downloads; create table indi_pub_downloads_datasource stored as parquet as -SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats +SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join publication on result_id=id where downloads>0 GROUP BY result_id, repository_id @@ -472,7 +472,7 @@ order by result_id; compute stats indi_pub_downloads_datasource; create table indi_pub_downloads_year stored as parquet as -SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us +SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join publication on result_id=id where downloads>0 GROUP BY result_id, `year` order by `year` asc; @@ -480,7 +480,7 @@ order by `year` asc; compute stats indi_pub_downloads_year; create table indi_pub_downloads_datasource_year stored as parquet as -SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us +SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join publication on result_id=id where downloads>0 GROUP BY result_id, repository_id, `year` From bdc46e3eaab57a926977d162131d78933566c08b Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 28 Sep 2022 14:59:08 +0300 Subject: [PATCH 2/7] Remove denormalization of results to fix downloads numbers in monitor --- .../oozie_app/scripts/step20-createMonitorDB.sql | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 290acbf9fc..2505c3a341 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -39,7 +39,6 @@ create table TARGET.result stored as parquet as 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) - 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly @@ -224,18 +223,3 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR --create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); --compute stats TARGET.indi_software_gold_oa; ---denorm -alter table TARGET.result rename to TARGET.res_tmp; - -create table TARGET.result_denorm stored as parquet as - select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype - from TARGET.res_tmp r - left outer join TARGET.result_projects rp on rp.id=r.id - left outer join TARGET.result_datasources rd on rd.id=r.id - left outer join TARGET.project p on p.id=rp.project - left outer join TARGET.datasource d on d.id=rd.datasource; -compute stats TARGET.result_denorm; - -alter table TARGET.result_denorm rename to TARGET.result; -drop table TARGET.res_tmp; ---- done! \ No newline at end of file From 2c0c3f18064da57d31f88639b41f94348a404b6e Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 28 Sep 2022 19:33:24 +0300 Subject: [PATCH 3/7] Cast amount to float for table result_apcs --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index 04c7f83b9f..86ead4a2c6 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -42,7 +42,7 @@ join ${stats_db_name}.result res on res.id=r.id; create table ${stats_db_name}.result_apc as select r.id, r.amount, r.currency from ( - select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency + select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r join ${stats_db_name}.result res on res.id=r.id where r.amount is not null; From 188f25eefae15d23eb92dbd977d6e41fd78af157 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 3 Oct 2022 12:42:52 +0200 Subject: [PATCH 4/7] [BipFinder] Fixed issue for wrong escaped char in doi --- .../createunresolvedentities/PrepareBipFinder.java | 6 ++++-- .../actionmanager/createunresolvedentities/PrepareTest.java | 5 ++++- .../dhp/actionmanager/createunresolvedentities/bip/bip.json | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 80573c71ae..a4f5c22bc0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import com.fasterxml.jackson.core.JsonParser; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -40,7 +41,6 @@ import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareBipFinder implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { @@ -82,9 +82,11 @@ public class PrepareBipFinder implements Serializable { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + ObjectMapper mapper = new ObjectMapper() + .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); JavaRDD bipDeserializeJavaRDD = sc .textFile(inputPath) - .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); + .map(item -> mapper.readValue(item, BipDeserialize.class)); spark .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index cc8108bde7..d0ce69043f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -88,7 +88,7 @@ public class PrepareTest { .textFile(workingDir.toString() + "/work/bip") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - Assertions.assertEquals(86, tmp.count()); + Assertions.assertEquals(87, tmp.count()); String doi1 = "unresolved::10.0000/096020199389707::doi"; @@ -151,6 +151,9 @@ public class PrepareTest { Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); + tmp.filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) + .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + } @Test diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json index 03cef4be11..3a077ab5c2 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json @@ -83,4 +83,5 @@ {"10.0000/hoplos.v4i7.41295": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} {"10.0000/hoplos.v4i7.42830": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} {"10.0000/hoplos.v4i7.42861": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} -{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} \ No newline at end of file +{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.2111/1551-5028(2004)057\[0539:sdsocg\]2.0.co;2": [{"id":"influence", "unit":[{"key":"score","value":"6.3290875E-9"},{"key":"class","value":"C"}]}, {"id":"popularity", "unit":[{"key":"score","value":"6.576763E-9"},{"key":"class","value":"C"}]}, {"id":"influence_alt", "unit":[{"key":"score","value":"11"},{"key":"class","value":"C"}]}, {"id":"popularity_alt", "unit":[{"key":"score","value":"1.0142108"},{"key":"class","value":"C"}]}, {"id":"impulse", "unit":[{"key":"score","value":"1"},{"key":"class","value":"C"}]}]} \ No newline at end of file From 28dc317350d792e75a843e8d4c9d6b2ea5735a2c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 4 Oct 2022 09:47:27 +0200 Subject: [PATCH 5/7] [BipFinder] refactoring --- .../createunresolvedentities/PrepareBipFinder.java | 4 ++-- .../actionmanager/createunresolvedentities/PrepareTest.java | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index a4f5c22bc0..2d2633e0f6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,7 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import com.fasterxml.jackson.core.JsonParser; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -23,6 +22,7 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; @@ -83,7 +83,7 @@ public class PrepareBipFinder implements Serializable { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); ObjectMapper mapper = new ObjectMapper() - .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); + .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); JavaRDD bipDeserializeJavaRDD = sc .textFile(inputPath) .map(item -> mapper.readValue(item, BipDeserialize.class)); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index d0ce69043f..6ae1f246de 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -151,8 +151,9 @@ public class PrepareTest { Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); - tmp.filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) - .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + tmp + .filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) + .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); } From 7324853a1720821fc7d2ce4f1db94cd5f9892932 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 4 Oct 2022 14:29:39 +0200 Subject: [PATCH 6/7] Revert "[BipFinder] refactoring" This reverts commit 28dc317350d792e75a843e8d4c9d6b2ea5735a2c. --- .../createunresolvedentities/PrepareBipFinder.java | 4 ++-- .../actionmanager/createunresolvedentities/PrepareTest.java | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 2d2633e0f6..a4f5c22bc0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import com.fasterxml.jackson.core.JsonParser; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -22,7 +23,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; @@ -83,7 +83,7 @@ public class PrepareBipFinder implements Serializable { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); ObjectMapper mapper = new ObjectMapper() - .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); + .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); JavaRDD bipDeserializeJavaRDD = sc .textFile(inputPath) .map(item -> mapper.readValue(item, BipDeserialize.class)); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 6ae1f246de..d0ce69043f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -151,9 +151,8 @@ public class PrepareTest { Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); - tmp - .filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) - .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + tmp.filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) + .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); } From 4d8339614b5e1240243e4a139d494e1988f6746e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 4 Oct 2022 14:29:47 +0200 Subject: [PATCH 7/7] Revert "[BipFinder] Fixed issue for wrong escaped char in doi" This reverts commit 188f25eefae15d23eb92dbd977d6e41fd78af157. --- .../createunresolvedentities/PrepareBipFinder.java | 6 ++---- .../actionmanager/createunresolvedentities/PrepareTest.java | 5 +---- .../dhp/actionmanager/createunresolvedentities/bip/bip.json | 3 +-- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index a4f5c22bc0..80573c71ae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -11,7 +11,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import com.fasterxml.jackson.core.JsonParser; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -41,6 +40,7 @@ import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareBipFinder implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { @@ -82,11 +82,9 @@ public class PrepareBipFinder implements Serializable { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - ObjectMapper mapper = new ObjectMapper() - .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); JavaRDD bipDeserializeJavaRDD = sc .textFile(inputPath) - .map(item -> mapper.readValue(item, BipDeserialize.class)); + .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); spark .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index d0ce69043f..cc8108bde7 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -88,7 +88,7 @@ public class PrepareTest { .textFile(workingDir.toString() + "/work/bip") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - Assertions.assertEquals(87, tmp.count()); + Assertions.assertEquals(86, tmp.count()); String doi1 = "unresolved::10.0000/096020199389707::doi"; @@ -151,9 +151,6 @@ public class PrepareTest { Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); - tmp.filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057")) - .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); - } @Test diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json index 3a077ab5c2..03cef4be11 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json @@ -83,5 +83,4 @@ {"10.0000/hoplos.v4i7.41295": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} {"10.0000/hoplos.v4i7.42830": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} {"10.0000/hoplos.v4i7.42861": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} -{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} -{"10.2111/1551-5028(2004)057\[0539:sdsocg\]2.0.co;2": [{"id":"influence", "unit":[{"key":"score","value":"6.3290875E-9"},{"key":"class","value":"C"}]}, {"id":"popularity", "unit":[{"key":"score","value":"6.576763E-9"},{"key":"class","value":"C"}]}, {"id":"influence_alt", "unit":[{"key":"score","value":"11"},{"key":"class","value":"C"}]}, {"id":"popularity_alt", "unit":[{"key":"score","value":"1.0142108"},{"key":"class","value":"C"}]}, {"id":"impulse", "unit":[{"key":"score","value":"1"},{"key":"class","value":"C"}]}]} \ No newline at end of file +{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} \ No newline at end of file